Hi everybody. I'm very new in programming languages and I have this project that from an input text file writes the frequency of the lemmas in the output file. I need to modify this and to make it work for an directory with text files as an input. I think I have to make a loop or something like that...but I don't know how :( what I've got so far is this:
public static void main(String[] args) {
HashMap<String, Integer> hash = new HashMap();
ArrayList<String> stopwords = new ArrayList<String>();
try {
FileReader in = new FileReader("text_lema_pos.txt");
BufferedReader br = new BufferedReader(in);
FileWriter fstream = new FileWriter("text_frecv.txt");
BufferedWriter out = new BufferedWriter(fstream);
FileReader stop = new FileReader("stopwords.inc");
BufferedReader words = new BufferedReader(stop);
String s = new String();
while ((s = words.readLine()) != null) {
stopwords.add(s);
}
while ((s = br.readLine()) != null) {
String tokens[] = s.split(" ");
for (int i = 0; i < tokens.length; i++) {
String lema[] = tokens[i].split("\\|");
int c = 0;
for (int w = 0; w < stopwords.size() && c == 0; w++) {
if (lema[1].equalsIgnoreCase(stopwords.get(w)))
{c = 1;
// System.out.println("lema[1]: "+lema[1]+ " stopwords.get(w): "+stopwords.get(w));
}
}
if (c == 0) {
String word = lema[1] + " " + lema[2];
if (hash.containsKey(word)) {
// get number of occurrences for this word
// increment it
// and put back again
hash.put(word, hash.get(word) + 1);
} else {
// this is first time we see this word, set value
// '1'
hash.put(word, 1);
}
}
}
}
// First we're getting values array
ArrayList<Integer> values = new ArrayList<Integer>();
values.addAll(hash.values());
// and sorting it (in reverse order)
Collections.sort(values, Collections.reverseOrder());
int last_i = -1;
// Now, for each value
for (Integer i : values) {
if (last_i == i) // without dublicates
continue;
last_i = i;
for (String str : hash.keySet()) {
if (hash.get(str) == i) // which have this value
{
String s1[] = str.split(" ");
//System.out.println("<word lemma=\"" + s1[0] + "\""
// + " pos=\"" + s1[1] + "\" frecv=\"" + i
// + "\">");
out.write("<word lemma=\"" + s1[0] + "\"" + " pos=\""
+ s1[1] + "\" frecv=\"" + i + "\">\n");
}
}
}
out.close();
in.close();
br.close();
} catch (IOException e) {
e.printStackTrace();
}
//System.out.println("gata");
}