View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *   http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing,
13   * software distributed under the License is distributed on an
14   * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15   * KIND, either express or implied.  See the License for the
16   * specific language governing permissions and limitations
17   * under the License.
18   */
19  package org.apache.maven.index.context;
20  
21  import org.apache.lucene.analysis.Analyzer;
22  import org.apache.lucene.analysis.AnalyzerWrapper;
23  import org.apache.lucene.analysis.LowerCaseFilter;
24  import org.apache.lucene.analysis.Tokenizer;
25  import org.apache.lucene.analysis.util.CharTokenizer;
26  import org.apache.maven.index.creator.JarFileContentsIndexCreator;
27  
28  /**
29   * A Nexus specific analyzer. Only difference from Lucene's SimpleAnalyzer is that we use LetterOrDigitTokenizer instead
30   * of LowerCaseTokenizer. LetterOrDigitTokenizer does pretty much the same as LowerCaseTokenizer, it normalizes to lower
31   * case letter, but it takes letters and numbers too (as opposed to LowerCaseTokenizer) as token chars.
32   *
33   * @author Eugene Kuleshov
34   * @author cstamas
35   */
36  public final class NexusAnalyzer extends AnalyzerWrapper {
37      private static final Analyzer CLASS_NAMES_ANALYZER = new Analyzer() {
38          @Override
39          protected TokenStreamComponents createComponents(String fieldName) {
40              final Tokenizer tokenizer = new DeprecatedClassnamesTokenizer();
41              return new TokenStreamComponents(tokenizer, new LowerCaseFilter(tokenizer));
42          }
43      };
44  
45      private static final Analyzer LETTER_OR_DIGIT_ANALYZER = new Analyzer() {
46          @Override
47          protected TokenStreamComponents createComponents(String filedName) {
48              final Tokenizer tokenizer = new LetterOrDigitTokenizer();
49              return new TokenStreamComponents(tokenizer, new LowerCaseFilter(tokenizer));
50          }
51      };
52  
53      public NexusAnalyzer() {
54          super(PER_FIELD_REUSE_STRATEGY);
55      }
56  
57      @Override
58      protected Analyzer getWrappedAnalyzer(String fieldName) {
59          if (JarFileContentsIndexCreator.FLD_CLASSNAMES_KW.getKey().equals(fieldName)) {
60              // To keep "backward" compatibility, we have to use old flawed tokenizer.
61              return CLASS_NAMES_ANALYZER;
62          } else {
63              return LETTER_OR_DIGIT_ANALYZER;
64          }
65      }
66  
67      // ==
68  
69      public static class NoopTokenizer extends CharTokenizer {
70          public NoopTokenizer() {
71              super();
72          }
73  
74          @Override
75          protected boolean isTokenChar(int i) {
76              return true;
77          }
78      }
79  
80      @Deprecated
81      public static class DeprecatedClassnamesTokenizer extends CharTokenizer {
82          public DeprecatedClassnamesTokenizer() {
83              super();
84          }
85  
86          @Override
87          protected boolean isTokenChar(int i) {
88              return i != '\n';
89          }
90      }
91  
92      public static class LetterOrDigitTokenizer extends CharTokenizer {
93          public LetterOrDigitTokenizer() {
94              super();
95          }
96  
97          @Override
98          protected boolean isTokenChar(int c) {
99              return Character.isLetterOrDigit(c);
100         }
101     }
102 }