LUCENE issue

drogba123 0 Junior Poster
14 Years Ago
Hi,
currently i created a lucene project and currently having a problem with the codes. Here are my codes (which i also refer to some useful websites explain lucene)
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Hits;
import org.apache.lucene.index.*;
import java.io.*;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.Scanner;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.search.Hit;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.store.RAMDirectory;

/**
 * This terminal application creates an Apache Lucene index in a folder and adds files into this index
 * based on the input of the user.
 */
public class TextFileIndexer {

    //declare all the fields
    
    public static final String FIELD_PATH = "path";
    public static final String FIELD_CONTENTS = "contents";
    public static final String FIELD_ID = "docno";
    public static final String FIELD_DATE = "date";

    private IndexWriter writer;
    private ArrayList<File> queue = new ArrayList<File>();

    @SuppressWarnings("static-access")
    public static void main(String[] args) throws IOException, ParseException {

        String s = null;
        Scanner sc = new Scanner(System.in);
        //Menu selections
        int choice;
        do {
            System.out.println("Welcome Search Engine. Please choose" + " your selections below\n" + "(1) Build index collection\n" + "(2) Search for the documents ids\n" + "(3) Exit\n");

            System.out.print("Enter your choice: ");
            choice = sc.nextInt();

            //switch statements
            switch (choice) {

                case 1:

                    System.out.println("Enter the path where the index will be created: ");

                    BufferedReader br = new BufferedReader(
                            new InputStreamReader(System.in));
                    s = br.readLine();

                    TextFileIndexer indexer = null;
                    try {
                        indexer = new TextFileIndexer(s);
                    } catch (Exception ex) {
                        System.out.println("Cannot create index..." + ex.getMessage());
                        System.exit(-1);
                    }
                    try {
                        System.out.println("Enter the file or folder name to add into the index (q=quit):");
                        System.out.println("[Acceptable file types: .xml, .html, .html, .txt]");
                        s = br.readLine();
                        //try to add file into the index
                        indexer.indexFileOrDirectory(s);
                    } catch (Exception e) {
                        System.out.println("Error indexing " + s + " : " + e.getMessage());
                    }
                    indexer.closeIndex();

                    break;

                case 2:

                    System.out.println("your query?");
                    br = new BufferedReader(
                            new InputStreamReader(System.in));
                    String a = br.readLine();
                    
                   searchIndex(a);
                    break;
                case 3:

                    //exit the program
                    System.out.println("Program exiting..");
                    break;

                default:

                    //display invalid selection
                    System.err.println("Err: Invalid selection");
            }
        } while (choice != 3);

    }


    public TextFileIndexer(String index) throws IOException, ParseException {
        // the boolean true parameter means to create a new index everytime,
        // potentially overwriting any existing files there.
        writer = new IndexWriter(index, new StandardAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED);
    }

    public static void searchIndex(String words) throws IOException, ParseException{

        System.out.println("Searching for '" + words + "'"  );
        Directory directory = FSDirectory.getDirectory(INDEX_COLLECTION);
        IndexReader indexReader = IndexReader.open(directory);
        IndexSearcher indexSearcher = new IndexSearcher(indexReader);

        Analyzer analyzer = new StandardAnalyzer();
        QueryParser queryParser = new QueryParser(FIELD_ID, analyzer);
        Query query = queryParser.parse(words);
        Hits hits = indexSearcher.search(query);
        System.out.println("Number of hits: " + hits.length());

        Iterator<Hit> it = hits.iterator();
        while (it.hasNext()) {
            Hit hit = it.next();
            Document document = hit.getDocument();
            String text = document.get(FIELD_ID);
            System.out.println("Hit: " + text);
        }

    }

    public void indexFileOrDirectory(String fileName) throws IOException {

        listFiles(new File(fileName));

        int originalNumDocs = writer.numDocs();
        for (File file : queue) {
            FileReader fr = null;
            try {
                Document doc = new Document();
                fr = new FileReader(file);
                doc.add(new Field(FIELD_CONTENTS, fr));

                String path = file.getCanonicalPath();
                doc.add(new Field(FIELD_PATH, path, Field.Store.YES, Field.Index.UN_TOKENIZED));

                String docno = file.getName();
                doc.add(new Field(FIELD_ID, docno, Field.Store.YES, Field.Index.UN_TOKENIZED));

                String date = file.getPath();
                doc.add(new Field(FIELD_DATE, date, Field.Store.YES, Field.Index.UN_TOKENIZED));

                writer.addDocument(doc);
                System.out.println("Added: " + file);


            } catch (Exception e) {
                System.out.println("Could not add: " + file);
            } finally {
                fr.close();
            }
        }

        int newNumDocs = writer.numDocs();
        System.out.println("");
        System.out.println("************************");
        System.out.println((newNumDocs - originalNumDocs) + " documents added.");
        System.out.println("************************");

        queue.clear();
    }

    private void listFiles(File file) {
        if (!file.exists()) {
            System.out.println(file + " does not exist.");
        }
        if (file.isDirectory()) {
            for (File f : file.listFiles()) {
                listFiles(f);
            }
        } else {
            String filename = file.getName().toLowerCase();
          
            if (filename.endsWith(".htm") || filename.endsWith(".html") ||
                    filename.endsWith(".xml") || filename.endsWith(".txt")) {
                queue.add(file);
            } else {
                System.out.println("Skipped " + filename);
            }
        }
    }

    public void closeIndex() throws IOException {
        writer.optimize();
        writer.close();
    }

     
}
highlighted in red is the issue here, I would like to get the directory of the files and subfiles of from the index that i just built. do give a feedback or suggestions. Thanks.
Edited 14 Years Ago by drogba123 because: n/a
1 Contributor
0 Replies
65 Views
Be the first to reply
Be a part of the DaniWeb community
We're a friendly, industry-focused community of developers, IT pros, digital marketers, and technology enthusiasts meeting, networking, learning, and sharing knowledge.