1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19 package org.apache.maven.index.updater;
20
21 import java.io.BufferedOutputStream;
22 import java.io.DataOutput;
23 import java.io.DataOutputStream;
24 import java.io.IOException;
25 import java.io.OutputStream;
26 import java.util.ArrayList;
27 import java.util.Date;
28 import java.util.List;
29 import java.util.zip.GZIPOutputStream;
30
31 import org.apache.lucene.document.Document;
32 import org.apache.lucene.document.Field;
33 import org.apache.lucene.document.StoredField;
34 import org.apache.lucene.index.IndexOptions;
35 import org.apache.lucene.index.IndexReader;
36 import org.apache.lucene.index.IndexableField;
37 import org.apache.lucene.index.MultiBits;
38 import org.apache.lucene.util.Bits;
39 import org.apache.maven.index.ArtifactInfo;
40 import org.apache.maven.index.IndexerField;
41 import org.apache.maven.index.context.DefaultIndexingContext;
42 import org.apache.maven.index.context.IndexingContext;
43
44
45
46
47
48
49 public class IndexDataWriter {
50 static final int VERSION = 1;
51
52 static final int F_INDEXED = 1;
53
54 static final int F_TOKENIZED = 2;
55
56 static final int F_STORED = 4;
57
58 static final int F_COMPRESSED = 8;
59
60 private final DataOutputStream dos;
61
62 private final GZIPOutputStream gos;
63
64 private final BufferedOutputStream bos;
65
66 private boolean descriptorWritten;
67
68 public IndexDataWriter(OutputStream os) throws IOException {
69 bos = new BufferedOutputStream(os, 1024 * 8);
70 gos = new GZIPOutputStream(bos, 1024 * 2);
71 dos = new DataOutputStream(gos);
72
73 this.descriptorWritten = false;
74 }
75
76 public int write(IndexingContext context, IndexReader indexReader, List<Integer> docIndexes) throws IOException {
77 writeHeader(context);
78
79 int n = writeDocuments(indexReader, docIndexes);
80
81 writeGroupFields(context);
82
83 close();
84
85 return n;
86 }
87
88 public void close() throws IOException {
89 dos.flush();
90
91 gos.flush();
92 gos.finish();
93
94 bos.flush();
95 }
96
97 public void writeHeader(IndexingContext context) throws IOException {
98 dos.writeByte(VERSION);
99
100 Date timestamp = context.getTimestamp();
101 dos.writeLong(timestamp == null ? -1 : timestamp.getTime());
102 }
103
104 public void writeGroupFields(IndexingContext context) throws IOException {
105 {
106 List<IndexableField> allGroupsFields = new ArrayList<>(2);
107 allGroupsFields.add(
108 new Field(ArtifactInfo.ALL_GROUPS, ArtifactInfo.ALL_GROUPS_VALUE, IndexerField.KEYWORD_STORED));
109 allGroupsFields.add(new StoredField(
110 ArtifactInfo.ALL_GROUPS_LIST,
111 ArtifactInfo.lst2str(context.getAllGroups()),
112 IndexerField.KEYWORD_STORED));
113 writeDocumentFields(allGroupsFields);
114 }
115
116 {
117 List<IndexableField> rootGroupsFields = new ArrayList<>(2);
118 rootGroupsFields.add(
119 new Field(ArtifactInfo.ROOT_GROUPS, ArtifactInfo.ROOT_GROUPS_VALUE, IndexerField.KEYWORD_STORED));
120 rootGroupsFields.add(new StoredField(
121 ArtifactInfo.ROOT_GROUPS_LIST,
122 ArtifactInfo.lst2str(context.getRootGroups()),
123 IndexerField.KEYWORD_STORED));
124 writeDocumentFields(rootGroupsFields);
125 }
126 }
127
128 public int writeDocuments(IndexReader r, List<Integer> docIndexes) throws IOException {
129 int n = 0;
130 Bits liveDocs = MultiBits.getLiveDocs(r);
131
132 if (docIndexes == null) {
133 for (int i = 0; i < r.maxDoc(); i++) {
134 if (liveDocs == null || liveDocs.get(i)) {
135 if (writeDocument(r.document(i))) {
136 n++;
137 }
138 }
139 }
140 } else {
141 for (int i : docIndexes) {
142 if (liveDocs == null || liveDocs.get(i)) {
143 if (writeDocument(r.document(i))) {
144 n++;
145 }
146 }
147 }
148 }
149
150 return n;
151 }
152
153 public boolean writeDocument(final Document document) throws IOException {
154 List<IndexableField> fields = document.getFields();
155
156 List<IndexableField> storedFields = new ArrayList<>(fields.size());
157
158 for (IndexableField field : fields) {
159 if (DefaultIndexingContext.FLD_DESCRIPTOR.equals(field.name())) {
160 if (descriptorWritten) {
161 return false;
162 } else {
163 descriptorWritten = true;
164 }
165 }
166
167 if (field.fieldType().stored()) {
168 storedFields.add(field);
169 }
170 }
171
172 writeDocumentFields(storedFields);
173
174 return true;
175 }
176
177 public void writeDocumentFields(List<IndexableField> fields) throws IOException {
178 dos.writeInt(fields.size());
179
180 for (IndexableField field : fields) {
181 writeField(field);
182 }
183 }
184
185 public void writeField(IndexableField field) throws IOException {
186 int flags = (field.fieldType().indexOptions() != IndexOptions.NONE ? F_INDEXED : 0)
187 + (field.fieldType().tokenized() ? F_TOKENIZED : 0)
188 + (field.fieldType().stored() ? F_STORED : 0);
189
190
191 String name = field.name();
192 String value = field.stringValue();
193
194 dos.write(flags);
195 dos.writeUTF(name);
196 writeUTF(value, dos);
197 }
198
199 private static void writeUTF(String str, DataOutput out) throws IOException {
200 int strlen = str.length();
201 int utflen = 0;
202 int c;
203
204
205 for (int i = 0; i < strlen; i++) {
206 c = str.charAt(i);
207 if ((c >= 0x0001) && (c <= 0x007F)) {
208 utflen++;
209 } else if (c > 0x07FF) {
210 utflen += 3;
211 } else {
212 utflen += 2;
213 }
214 }
215
216
217 out.writeInt(utflen);
218
219 byte[] bytearr = new byte[utflen];
220
221 int count = 0;
222
223 int i = 0;
224 for (; i < strlen; i++) {
225 c = str.charAt(i);
226 if (!((c >= 0x0001) && (c <= 0x007F))) {
227 break;
228 }
229 bytearr[count++] = (byte) c;
230 }
231
232 for (; i < strlen; i++) {
233 c = str.charAt(i);
234 if ((c >= 0x0001) && (c <= 0x007F)) {
235 bytearr[count++] = (byte) c;
236
237 } else if (c > 0x07FF) {
238 bytearr[count++] = (byte) (0xE0 | ((c >> 12) & 0x0F));
239 bytearr[count++] = (byte) (0x80 | ((c >> 6) & 0x3F));
240 bytearr[count++] = (byte) (0x80 | ((c) & 0x3F));
241 } else {
242 bytearr[count++] = (byte) (0xC0 | ((c >> 6) & 0x1F));
243 bytearr[count++] = (byte) (0x80 | ((c) & 0x3F));
244 }
245 }
246
247 out.write(bytearr, 0, utflen);
248 }
249 }