1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19 package org.apache.maven.index;
20
21 import java.io.IOException;
22 import java.io.StringReader;
23 import java.util.ArrayList;
24 import java.util.Iterator;
25 import java.util.List;
26
27 import org.apache.lucene.analysis.Analyzer;
28 import org.apache.lucene.analysis.CachingTokenFilter;
29 import org.apache.lucene.analysis.TokenStream;
30 import org.apache.lucene.document.Document;
31 import org.apache.lucene.search.Explanation;
32 import org.apache.lucene.search.IndexSearcher;
33 import org.apache.lucene.search.Query;
34 import org.apache.lucene.search.TopDocs;
35 import org.apache.lucene.search.highlight.Formatter;
36 import org.apache.lucene.search.highlight.Highlighter;
37 import org.apache.lucene.search.highlight.InvalidTokenOffsetsException;
38 import org.apache.lucene.search.highlight.QueryScorer;
39 import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
40 import org.apache.lucene.search.highlight.TextFragment;
41 import org.apache.maven.index.context.IndexUtils;
42 import org.apache.maven.index.context.IndexingContext;
43 import org.apache.maven.index.context.NexusIndexMultiSearcher;
44 import org.apache.maven.index.creator.JarFileContentsIndexCreator;
45
46
47
48
49
50
51 public class DefaultIteratorResultSet implements IteratorResultSet {
52 private final IteratorSearchRequest searchRequest;
53
54 private final NexusIndexMultiSearcher indexSearcher;
55
56 private final List<IndexingContext> contexts;
57
58 private final int[] starts;
59
60 private final ArtifactInfoFilter filter;
61
62 private final ArtifactInfoPostprocessor postprocessor;
63
64 private final List<MatchHighlightRequest> matchHighlightRequests;
65
66 private final TopDocs hits;
67
68 private final int from;
69
70 private final int count;
71
72 private final int maxRecPointer;
73
74 private int pointer;
75
76 private int processedArtifactInfoCount;
77
78 private ArtifactInfo ai;
79
80 protected DefaultIteratorResultSet(
81 final IteratorSearchRequest request,
82 final NexusIndexMultiSearcher indexSearcher,
83 final List<IndexingContext> contexts,
84 final TopDocs hits)
85 throws IOException {
86 this.searchRequest = request;
87
88 this.indexSearcher = indexSearcher;
89
90 this.contexts = contexts;
91
92 {
93 int maxDoc = 0;
94 this.starts = new int[contexts.size() + 1];
95
96
97 final List<IndexSearcher> acquiredSearchers =
98 indexSearcher.getNexusIndexMultiReader().getAcquiredSearchers();
99 for (int i = 0; i < contexts.size(); i++) {
100 starts[i] = maxDoc;
101 maxDoc += acquiredSearchers.get(i).getIndexReader().maxDoc();
102 }
103 starts[contexts.size()] = maxDoc;
104 }
105
106 this.filter = request.getArtifactInfoFilter();
107
108 this.postprocessor = request.getArtifactInfoPostprocessor();
109
110 this.matchHighlightRequests = request.getMatchHighlightRequests();
111
112 List<MatchHighlightRequest> matchHighlightRequests = new ArrayList<>();
113 for (MatchHighlightRequest hr : request.getMatchHighlightRequests()) {
114 Query rewrittenQuery = hr.getQuery().rewrite(indexSearcher.getIndexReader());
115 matchHighlightRequests.add(new MatchHighlightRequest(hr.getField(), rewrittenQuery, hr.getHighlightMode()));
116 }
117
118 this.hits = hits;
119
120 this.from = request.getStart();
121
122 this.count = (request.getCount() == AbstractSearchRequest.UNDEFINED
123 ? hits.scoreDocs.length
124 : Math.min(request.getCount(), hits.scoreDocs.length));
125
126 this.pointer = from;
127
128 this.processedArtifactInfoCount = 0;
129
130 this.maxRecPointer = from + count;
131
132 ai = createNextAi();
133
134 if (ai == null) {
135 cleanUp();
136 }
137 }
138
139 public boolean hasNext() {
140 return ai != null;
141 }
142
143 public ArtifactInfo next() {
144 ArtifactInfo result = ai;
145
146 try {
147 ai = createNextAi();
148 } catch (IOException e) {
149 ai = null;
150
151 throw new IllegalStateException("Cannot fetch next ArtifactInfo!", e);
152 } finally {
153 if (ai == null) {
154 cleanUp();
155 }
156 }
157
158 return result;
159 }
160
161 public void remove() {
162 throw new UnsupportedOperationException(
163 "Method not supported on " + getClass().getName());
164 }
165
166 public Iterator<ArtifactInfo> iterator() {
167 return this;
168 }
169
170 public void close() {
171 cleanUp();
172 }
173
174 public int getTotalProcessedArtifactInfoCount() {
175 return processedArtifactInfoCount;
176 }
177
178 @Override
179 public void finalize() throws Throwable {
180 super.finalize();
181
182 if (!cleanedUp) {
183 System.err.println("#WARNING: Lock leaking from " + getClass().getName() + " for query "
184 + searchRequest.getQuery().toString());
185
186 cleanUp();
187 }
188 }
189
190
191
192 protected ArtifactInfo createNextAi() throws IOException {
193 ArtifactInfo result = null;
194
195
196
197
198
199
200 while ((result == null) && (pointer < maxRecPointer) && (pointer < hits.scoreDocs.length)) {
201 Document doc = indexSearcher.doc(hits.scoreDocs[pointer].doc);
202
203 IndexingContext context = getIndexingContextForPointer(doc, hits.scoreDocs[pointer].doc);
204
205 result = IndexUtils.constructArtifactInfo(doc, context);
206
207 if (result != null) {
208
209
210 if (searchRequest.isLuceneExplain()) {
211 result.getAttributes()
212 .put(
213 Explanation.class.getName(),
214 indexSearcher
215 .explain(searchRequest.getQuery(), hits.scoreDocs[pointer].doc)
216 .toString());
217 }
218
219 result.setLuceneScore(hits.scoreDocs[pointer].score);
220
221 result.setRepository(context.getRepositoryId());
222
223 result.setContext(context.getId());
224
225 if (filter != null) {
226 if (!filter.accepts(context, result)) {
227 result = null;
228 }
229 }
230
231 if (result != null && postprocessor != null) {
232 postprocessor.postprocess(context, result);
233 }
234
235 if (result != null && matchHighlightRequests.size() > 0) {
236 calculateHighlights(context, doc, result);
237 }
238 }
239
240 pointer++;
241 processedArtifactInfoCount++;
242 }
243
244 return result;
245 }
246
247 private volatile boolean cleanedUp = false;
248
249 protected synchronized void cleanUp() {
250 if (cleanedUp) {
251 return;
252 }
253
254 try {
255 indexSearcher.release();
256 } catch (IOException e) {
257 throw new IllegalStateException(e);
258 }
259
260 this.cleanedUp = true;
261 }
262
263
264
265
266
267
268
269
270 protected void calculateHighlights(IndexingContext context, Document d, ArtifactInfo ai) throws IOException {
271 IndexerField field;
272
273 String text;
274
275 List<String> highlightFragment;
276
277 for (MatchHighlightRequest hr : matchHighlightRequests) {
278 field = selectStoredIndexerField(hr.getField());
279
280 if (field != null) {
281 text = ai.getFieldValue(field.getOntology());
282
283 if (text != null) {
284 highlightFragment = highlightField(context, hr, field, text);
285
286 if (highlightFragment != null && highlightFragment.size() > 0) {
287 MatchHighlight matchHighlight = new MatchHighlight(hr.getField(), highlightFragment);
288
289 ai.getMatchHighlights().add(matchHighlight);
290 }
291 }
292 }
293 }
294 }
295
296
297
298
299
300
301
302 protected IndexerField selectStoredIndexerField(Field field) {
303
304 if (MAVEN.CLASSNAMES.equals(field)) {
305 return JarFileContentsIndexCreator.FLD_CLASSNAMES;
306 } else {
307 return field.getIndexerFields().isEmpty()
308 ? null
309 : field.getIndexerFields().iterator().next();
310 }
311 }
312
313
314
315
316
317
318
319
320
321
322
323 protected List<String> highlightField(
324 IndexingContext context, MatchHighlightRequest hr, IndexerField field, String text) throws IOException {
325
326 if (MAVEN.CLASSNAMES.equals(field.getOntology())) {
327 text = text.replace('/', '.').replaceAll("^\\.", "").replaceAll("\n\\.", "\n");
328 }
329
330 Analyzer analyzer = context.getAnalyzer();
331 TokenStream baseTokenStream = analyzer.tokenStream(field.getKey(), new StringReader(text));
332
333 CachingTokenFilter tokenStream = new CachingTokenFilter(baseTokenStream);
334
335 Formatter formatter;
336
337 if (MatchHighlightMode.HTML.equals(hr.getHighlightMode())) {
338 formatter = new SimpleHTMLFormatter();
339 } else {
340 tokenStream.reset();
341 tokenStream.end();
342 tokenStream.close();
343 throw new UnsupportedOperationException(
344 "Hightlight more \"" + hr.getHighlightMode().toString() + "\" is not supported!");
345 }
346
347 List<String> bestFragments = getBestFragments(hr.getQuery(), formatter, tokenStream, text, 3);
348
349 return bestFragments;
350 }
351
352 protected final List<String> getBestFragments(
353 Query query, Formatter formatter, TokenStream tokenStream, String text, int maxNumFragments)
354 throws IOException {
355 Highlighter highlighter = new Highlighter(formatter, new CleaningEncoder(), new QueryScorer(query));
356
357 highlighter.setTextFragmenter(new OneLineFragmenter());
358
359 maxNumFragments = Math.max(1, maxNumFragments);
360
361 TextFragment[] frag;
362
363 ArrayList<String> fragTexts = new ArrayList<>(maxNumFragments);
364
365 try {
366 frag = highlighter.getBestTextFragments(tokenStream, text, false, maxNumFragments);
367
368 for (TextFragment textFragment : frag) {
369 if ((textFragment != null) && (textFragment.getScore() > 0)) {
370 fragTexts.add(textFragment.toString());
371 }
372 }
373 } catch (InvalidTokenOffsetsException e) {
374
375 }
376
377 return fragTexts;
378 }
379
380 protected IndexingContext getIndexingContextForPointer(Document doc, int docPtr) {
381 return contexts.get(readerIndex(docPtr, this.starts, this.contexts.size()));
382 }
383
384 private static int readerIndex(int n, int[] starts, int numSubReaders) {
385 int lo = 0;
386 int hi = numSubReaders - 1;
387
388 while (hi >= lo) {
389 int mid = (lo + hi) >>> 1;
390 int midValue = starts[mid];
391 if (n < midValue) {
392 hi = mid - 1;
393 } else if (n > midValue) {
394 lo = mid + 1;
395 } else {
396 while (mid + 1 < numSubReaders && starts[mid + 1] == midValue) {
397 mid++;
398 }
399 return mid;
400 }
401 }
402 return hi;
403 }
404 }