I am trying to write a sample program that can call use the main method of "SequenceFilesFromDirectory", which aims to convert a set of files into sequence file format.
public class TestSequenceFileConverter {
public static void main(String args[]){
String inputDir = "inputDir";
String outputDir = "outoutDir";
SequenceFilesFromDirectory.main(new String[] {"--input",
inputDir.toString(), "--output", outputDir.toString(), "--chunkSize",
"64", "--charset",Charsets.UTF_8.name()});
}
}
But the Eclipse tells me that what I did was wrong with the following error message
Multiple markers at this line
- Syntax error on token "main", = expected after this
token
- Syntax error on token(s), misplaced construct(s)
- SequenceFilesFromDirectory cannot be resolved
The following is how the SequenceFilesFromDirectory defines. The API link for SequenceFilesFromDirectory is http://search-lucene.com/jd/mahout/utils/org/apache/mahout/text/SequenceFilesFromDirectory.html
/**
* Converts a directory of text documents into SequenceFiles of Specified chunkSize. This class takes in a
* parent directory containing sub folders of text documents and recursively reads the files and creates the
* {@link SequenceFile}s of docid => content. The docid is set as the relative path of the document from the
* parent directory prepended with a specified prefix. You can also specify the input encoding of the text
* files. The content of the output SequenceFiles are encoded as UTF-8 text.
*/
public class SequenceFilesFromDirectory extends AbstractJob {
private static final Logger log = LoggerFactory.getLogger(SequenceFilesFromDirectory.class);
private static final String PREFIX_ADDITION_FILTER = PrefixAdditionFilter.class.getName();
public static final String[] CHUNK_SIZE_OPTION = {"chunkSize", "chunk"};
public static final String[] FILE_FILTER_CLASS_OPTION = {"fileFilterClass","filter"};
public static final String[] KEY_PREFIX_OPTION = {"keyPrefix", "prefix"};
public static final String[] CHARSET_OPTION = {"charset", "c"};
public void run(Configuration conf,
String keyPrefix,
Map<String, String> options,
Path input,
Path output)
throws InstantiationException, IllegalAccessException, InvocationTargetException, IOException,
NoSuchMethodException, ClassNotFoundException {
FileSystem fs = FileSystem.get(conf);
ChunkedWriter writer = new ChunkedWriter(conf, Integer.parseInt(options.get(CHUNK_SIZE_OPTION[0])), output);
SequenceFilesFromDirectoryFilter pathFilter;
String fileFilterClassName = options.get(FILE_FILTER_CLASS_OPTION[0]);
if (PrefixAdditionFilter.class.getName().equals(fileFilterClassName)) {
pathFilter = new PrefixAdditionFilter(conf, keyPrefix, options, writer);
} else {
Class<? extends SequenceFilesFromDirectoryFilter> pathFilterClass = Class.forName(fileFilterClassName).asSubclass(SequenceFilesFromDirectoryFilter.class);
Constructor<? extends SequenceFilesFromDirectoryFilter> constructor =
pathFilterClass.getConstructor(Configuration.class, String.class, Map.class, ChunkedWriter.class);
pathFilter = constructor.newInstance(conf, keyPrefix, options, writer);
}
fs.listStatus(input, pathFilter);
writer.close();
}
public static void main(String[] args) throws Exception {
ToolRunner.run(new SequenceFilesFromDirectory(), args);
}
/*
* callback main after processing hadoop parameters
*/
@Override
public int run(String[] args)
throws IOException, ClassNotFoundException, InstantiationException, IllegalAccessException, NoSuchMethodException,
InvocationTargetException {
addOptions();
if (parseArguments(args) == null) {
return -1;
}
Map<String, String> options = parseOptions();
Path input = getInputPath();
Path output = getOutputPath();
if (hasOption(DefaultOptionCreator.OVERWRITE_OPTION)) {
Configuration conf = new Configuration();
HadoopUtil.delete(conf, output);
}
String keyPrefix = getOption(KEY_PREFIX_OPTION[0]);
run(getConf(), keyPrefix, options, input, output);
return 0;
}
/**
* Override this method in order to add additional options to the command line of the SequenceFileFromDirectory job.
* Do not forget to call super() otherwise all standard options (input/output dirs etc) will not be available.
* */
protected void addOptions() {
addInputOption();
addOutputOption();
addOption(DefaultOptionCreator.overwriteOption().create());
addOption(CHUNK_SIZE_OPTION[0], CHUNK_SIZE_OPTION[1], "The chunkSize in MegaBytes. Defaults to 64", "64");
addOption(FILE_FILTER_CLASS_OPTION[0], FILE_FILTER_CLASS_OPTION[1],
"The name of the class to use for file parsing. Default: " + PREFIX_ADDITION_FILTER, PREFIX_ADDITION_FILTER);
addOption(KEY_PREFIX_OPTION[0], KEY_PREFIX_OPTION[1], "The prefix to be prepended to the key", "");
addOption(CHARSET_OPTION[0], CHARSET_OPTION[1],
"The name of the character encoding of the input files. Default to UTF-8", "UTF-8");
}
/**
* Override this method in order to parse your additional options from the command line. Do not forget to call
* super() otherwise standard options (input/output dirs etc) will not be available.
*/
protected Map<String, String> parseOptions() throws IOException {
Map<String, String> options = new HashMap<String, String>();
options.put(CHUNK_SIZE_OPTION[0], getOption(CHUNK_SIZE_OPTION[0]));
options.put(FILE_FILTER_CLASS_OPTION[0], getOption(FILE_FILTER_CLASS_OPTION[0]));
options.put(CHARSET_OPTION[0], getOption(CHARSET_OPTION[0]));
return options;
}
}