iam building a training set for spam filtering using java,training set data
is given as words in text files in one folder,but im unable to debug whats going wrong in java collections
/*
*folder part 1 conatins various text files having name format
* **spmsg***.txt and ***legit***.txt e.g 11927legit569.txt ,106127spmsgc26
* inside it it has contents like Subject: 5581 2005 47
* 1791 80 15184 64 2155 5581 2005 80 21292 10607 47 64
* 209 22910 132 1672 84 3865 84 1570 84 2005 84 10492 1847 12330 4797 82 13886 2170 47
*/
package naive_bayes;
import java.util.*;
import java.io.*;
/**
*
* @author anuran mohanty
* Indian institute of technology madras
*/
class Naivebayes
{ //dictionary containing words and description are they spam or not?
private Map<String, String> Dictionary = new HashMap<String, String>();
//frequency list of all spam words
private Map<String, Integer> spam = new HashMap<String, Integer>();
//frequency list of all non spam words
private Map<String, Integer> legit = new HashMap<String, Integer>();
// Initialize frequency table from command line
public Naivebayes(){
//selecting all the words from spam text files present in folder part 1
for (String a : textselector("D:\\study\\ML\\Assignment_1\\Data\\part1","spmsg") ) {
Integer freq = spam.get(a);
spam.put(a, (freq == null) ? 1 : freq + 1);
}
//selecting all the words from non spam text files present in folder part 1
for (String a : textselector("D:\\study\\ML\\Assignment_1\\Data\\part1","legit")) {
Integer freq = legit.get(a);
legit.put(a, (freq == null) ? 1 : freq + 1);
}
//putting the words in dictionary
for (String value : legit.keySet()) {
Dictionary.put(value,"legit");
}
for (String value : spam.keySet()) {
Dictionary.put(value,"spam");
}
}
public ArrayList<String> textselector(String path,String flag)
{
ArrayList<String> texts=new ArrayList<String>();
String files;
FileReader fr;
BufferedReader br;
File folder = new File(path);
File[] listOfFiles = folder.listFiles();
for (int i = 0; i < listOfFiles.length; i++)
{
if (listOfFiles[i].isFile())
{
files = listOfFiles[i].getName();
if (files.contains(flag))
{ try{ fr = new FileReader(path+"\\"+files);
br = new BufferedReader(fr);
Scanner scan = new Scanner(br);
while((scan.hasNext()))
{
texts.add(new String(scan.next().toString()));
}
}catch(Exception E){System.out.println("Exception found"+E);}
}
}
}
return texts;
}
public void ShowDictionary()
{ System.out.println("here it is "+Dictionary.values()); }
}
public class Main {
/**
* @param args the command line arguments
*/
public static void main(String[] args) {
Naivebayes nb=new Naivebayes();
nb.ShowDictionary();
}
}
//output coming like "here it is []"