import java.util.regex.Pattern;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.core.LowerCaseFilter;
import org.apache.lucene.analysis.pattern.PatternTokenizer;
public final class JAFDefaultAnalyzer extends Analyzer {
private static final Pattern PATTERN = Pattern.compile("\\s+|[\\)\\}\\].,;:!?\"](\\s+|$)", Pattern.MULTILINE);
public static Pattern getPattern() {
return PATTERN;
}
@Override
protected TokenStreamComponents createComponents(String fieldName) {
Tokenizer tokenizer = new PatternTokenizer(PATTERN, -1);
TokenStream stream = new LowerCaseFilter(tokenizer);
return new TokenStreamComponents(tokenizer, stream);
}
}