eclipse/eclipse-lucene-4.patch

449 lines
18 KiB
Diff

diff --git a/eclipse.platform.ua/org.eclipse.help.base/src/org/eclipse/help/internal/search/Analyzer_en.java b/eclipse.platform.ua/org.eclipse.help.base/src/org/eclipse/help/internal/search/Analyzer_en.java
index 4e6786c..49dc51b 100644
--- a/eclipse.platform.ua/org.eclipse.help.base/src/org/eclipse/help/internal/search/Analyzer_en.java
+++ b/eclipse.platform.ua/org.eclipse.help.base/src/org/eclipse/help/internal/search/Analyzer_en.java
@@ -10,10 +10,14 @@
*******************************************************************************/
package org.eclipse.help.internal.search;
import java.io.*;
-import java.util.HashSet;
-import java.util.Set;
+import java.util.ArrayList;
+import java.util.List;
import org.apache.lucene.analysis.*;
+import org.apache.lucene.analysis.en.*;
+import org.apache.lucene.analysis.core.*;
+import org.apache.lucene.analysis.util.*;
+import org.apache.lucene.util.Version;
/**
* Lucene Analyzer for English. LowerCaseTokenizer->StopFilter->PorterStemFilter
*/
@@ -25,18 +29,22 @@ public final class Analyzer_en extends Analyzer {
super();
}
/**
- * Creates a TokenStream which tokenizes all the text in the provided
+ * Creates a TokenStreamComponents which tokenizes all the text in the provided
* Reader.
*/
- public final TokenStream tokenStream(String fieldName, Reader reader) {
- return new PorterStemFilter(new StopFilter(false, new LowerCaseAndDigitsTokenizer(reader), getStopWords(), false));
+ @Override
+ public final TokenStreamComponents createComponents(String fieldName, Reader reader) {
+ CharArraySet stopWordsSet = StopFilter.makeStopSet(Version.LUCENE_47, getStopWords(), true);
+ Tokenizer source = new LowerCaseAndDigitsTokenizer(reader);
+ TokenStream filter = new PorterStemFilter(new StopFilter(Version.LUCENE_47, source, stopWordsSet));
+ return new TokenStreamComponents(source, filter);
}
- private Set<String> stopWords;
+ private List<String> stopWords;
- private Set<String> getStopWords() {
+ private List<String> getStopWords() {
if ( stopWords == null ) {
- stopWords = new HashSet<String>();
+ stopWords = new ArrayList<String>();
for (int i = 0; i < STOP_WORDS.length; i++) {
stopWords.add(STOP_WORDS[i]);
}
diff --git a/eclipse.platform.ua/org.eclipse.help.base/src/org/eclipse/help/internal/search/DefaultAnalyzer.java b/eclipse.platform.ua/org.eclipse.help.base/src/org/eclipse/help/internal/search/DefaultAnalyzer.java
index 50258d2..f72615f 100644
--- a/eclipse.platform.ua/org.eclipse.help.base/src/org/eclipse/help/internal/search/DefaultAnalyzer.java
+++ b/eclipse.platform.ua/org.eclipse.help.base/src/org/eclipse/help/internal/search/DefaultAnalyzer.java
@@ -15,8 +15,10 @@ import java.util.StringTokenizer;
import com.ibm.icu.text.BreakIterator;
import org.apache.lucene.analysis.Analyzer;
-import org.apache.lucene.analysis.LowerCaseFilter;
+import org.apache.lucene.analysis.core.LowerCaseFilter;
+import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.util.Version;
import org.eclipse.help.internal.base.HelpBasePlugin;
@@ -82,11 +84,14 @@ public final class DefaultAnalyzer extends Analyzer {
}
/**
- * Creates a TokenStream which tokenizes all the text in the provided
+ * Creates a TokenStreamComponents which tokenizes all the text in the provided
* Reader.
*/
- public final TokenStream tokenStream(String fieldName, Reader reader) {
- return new LowerCaseFilter(new WordTokenStream(fieldName, reader, locale));
+ @Override
+ public final TokenStreamComponents createComponents(String fieldName, Reader reader) {
+ Tokenizer source = new WordTokenStream(fieldName, reader, locale);
+ LowerCaseFilter filter = new LowerCaseFilter(Version.LUCENE_47, source);
+ return new TokenStreamComponents(source, filter);
}
/**
diff --git a/eclipse.platform.ua/org.eclipse.help.base/src/org/eclipse/help/internal/search/LowerCaseAndDigitsTokenizer.java b/eclipse.platform.ua/org.eclipse.help.base/src/org/eclipse/help/internal/search/LowerCaseAndDigitsTokenizer.java
index 0dd3943..d101ae9 100644
--- a/eclipse.platform.ua/org.eclipse.help.base/src/org/eclipse/help/internal/search/LowerCaseAndDigitsTokenizer.java
+++ b/eclipse.platform.ua/org.eclipse.help.base/src/org/eclipse/help/internal/search/LowerCaseAndDigitsTokenizer.java
@@ -12,7 +12,8 @@ package org.eclipse.help.internal.search;
import java.io.*;
-import org.apache.lucene.analysis.*;
+import org.apache.lucene.analysis.util.*;
+import org.apache.lucene.util.Version;
/**
* Tokenizer breaking words around letters or digits.
@@ -20,13 +21,14 @@ import org.apache.lucene.analysis.*;
public class LowerCaseAndDigitsTokenizer extends CharTokenizer {
public LowerCaseAndDigitsTokenizer(Reader input) {
- super(input);
+ super(Version.LUCENE_47, input);
}
protected char normalize(char c) {
return Character.toLowerCase(c);
}
- protected boolean isTokenChar(char c) {
+ @Override
+ public boolean isTokenChar(int c) {
return Character.isLetterOrDigit(c);
}
diff --git a/eclipse.platform.ua/org.eclipse.help.base/src/org/eclipse/help/internal/search/QueryBuilder.java b/eclipse.platform.ua/org.eclipse.help.base/src/org/eclipse/help/internal/search/QueryBuilder.java
index 00c2799..9f9962d 100644
--- a/eclipse.platform.ua/org.eclipse.help.base/src/org/eclipse/help/internal/search/QueryBuilder.java
+++ b/eclipse.platform.ua/org.eclipse.help.base/src/org/eclipse/help/internal/search/QueryBuilder.java
@@ -243,16 +243,24 @@ public class QueryBuilder {
private List<String> analyzeText(Analyzer analyzer, String fieldName, String text) {
List<String> words = new ArrayList<String>(1);
Reader reader = new StringReader(text);
- TokenStream tStream = analyzer.tokenStream(fieldName, reader);
- CharTermAttribute termAttribute = (CharTermAttribute) tStream.getAttribute(CharTermAttribute.class);
+ TokenStream tStream = null;
try {
+ tStream = analyzer.tokenStream(fieldName, reader);
+ tStream.reset();
+ CharTermAttribute termAttribute = (CharTermAttribute) tStream.getAttribute(CharTermAttribute.class);
while (tStream.incrementToken()) {
String term = termAttribute.toString();
words.add(term);
}
- reader.close();
} catch (IOException ioe) {
+ } finally {
+ if (tStream != null) {
+ try {
+ tStream.close();
+ } catch (IOException e) {
+ }
+ }
}
return words;
diff --git a/eclipse.platform.ua/org.eclipse.help.base/src/org/eclipse/help/internal/search/SearchIndex.java b/eclipse.platform.ua/org.eclipse.help.base/src/org/eclipse/help/internal/search/SearchIndex.java
index e860c8d..0d5a8b6 100644
--- a/eclipse.platform.ua/org.eclipse.help.base/src/org/eclipse/help/internal/search/SearchIndex.java
+++ b/eclipse.platform.ua/org.eclipse.help.base/src/org/eclipse/help/internal/search/SearchIndex.java
@@ -31,13 +31,20 @@ import java.util.Set;
import java.util.zip.ZipEntry;
import java.util.zip.ZipInputStream;
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.miscellaneous.LimitTokenCountAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
+import org.apache.lucene.index.AtomicReader;
+import org.apache.lucene.index.DirectoryReader;
+import org.apache.lucene.index.DocsEnum;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.index.IndexWriterConfig;
+import org.apache.lucene.index.IndexWriterConfig.OpenMode;
+import org.apache.lucene.index.LogDocMergePolicy;
+import org.apache.lucene.index.SlowCompositeReaderWrapper;
import org.apache.lucene.index.Term;
-import org.apache.lucene.index.TermDocs;
-import org.apache.lucene.index.IndexWriter.MaxFieldLength;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
@@ -265,10 +272,10 @@ public class SearchIndex implements IHelpSearchIndex {
if (iw != null) {
iw.close();
}
- boolean create = false;
+ OpenMode create = OpenMode.APPEND;
if (!indexDir.exists() || !isLuceneCompatible() || !isAnalyzerCompatible()
|| inconsistencyFile.exists() && firstOperation) {
- create = true;
+ create = OpenMode.CREATE;
indexDir.mkdirs();
if (!indexDir.exists())
return false; // unable to setup index directory
@@ -276,9 +283,13 @@ public class SearchIndex implements IHelpSearchIndex {
indexedDocs = new HelpProperties(INDEXED_DOCS_FILE, indexDir);
indexedDocs.restore();
setInconsistent(true);
- MaxFieldLength max = new MaxFieldLength(1000000);
- iw = new IndexWriter(luceneDirectory, analyzerDescriptor.getAnalyzer(), create, max);
- iw.setMergeFactor(20);
+ Analyzer wrapper = new LimitTokenCountAnalyzer(analyzerDescriptor.getAnalyzer(), 1000000);
+ IndexWriterConfig iconfig = new IndexWriterConfig(org.apache.lucene.util.Version.LUCENE_47, wrapper);
+ iconfig.setOpenMode(create);
+ LogDocMergePolicy policy = new LogDocMergePolicy();
+ policy.setMergeFactor(20);
+ iconfig.setMergePolicy(policy);
+ iw = new IndexWriter(luceneDirectory, iconfig);
return true;
} catch (IOException e) {
HelpBasePlugin.logError("Exception occurred in search indexing at beginAddBatch.", e); //$NON-NLS-1$
@@ -297,7 +308,7 @@ public class SearchIndex implements IHelpSearchIndex {
indexedDocs = new HelpProperties(INDEXED_DOCS_FILE, indexDir);
indexedDocs.restore();
setInconsistent(true);
- ir = IndexReader.open(luceneDirectory, false);
+ ir = DirectoryReader.open(luceneDirectory);
return true;
} catch (IOException e) {
HelpBasePlugin.logError("Exception occurred in search indexing at beginDeleteBatch.", e); //$NON-NLS-1$
@@ -313,7 +324,7 @@ public class SearchIndex implements IHelpSearchIndex {
if (ir != null) {
ir.close();
}
- ir = IndexReader.open(luceneDirectory, false);
+ ir = DirectoryReader.open(luceneDirectory);
return true;
} catch (IOException e) {
HelpBasePlugin.logError("Exception occurred in search indexing at beginDeleteBatch.", e); //$NON-NLS-1$
@@ -331,7 +342,7 @@ public class SearchIndex implements IHelpSearchIndex {
public IStatus removeDocument(String name) {
Term term = new Term(FIELD_NAME, name);
try {
- ir.deleteDocuments(term);
+ iw.deleteDocuments(term);
indexedDocs.remove(name);
} catch (IOException e) {
return new Status(IStatus.ERROR, HelpBasePlugin.PLUGIN_ID, IStatus.ERROR,
@@ -350,7 +361,7 @@ public class SearchIndex implements IHelpSearchIndex {
if (iw == null)
return false;
if (optimize)
- iw.optimize();
+ iw.forceMerge(1);
iw.close();
iw = null;
// save the update info:
@@ -369,7 +380,7 @@ public class SearchIndex implements IHelpSearchIndex {
* know about this change. Close it so that it gets reloaded next search.
*/
if (searcher != null) {
- searcher.close();
+ searcher.getIndexReader().close();
searcher = null;
}
return true;
@@ -401,7 +412,7 @@ public class SearchIndex implements IHelpSearchIndex {
* know about this change. Close it so that it gets reloaded next search.
*/
if (searcher != null) {
- searcher.close();
+ searcher.getIndexReader().close();
searcher = null;
}
return true;
@@ -505,8 +516,8 @@ public class SearchIndex implements IHelpSearchIndex {
}
Directory[] luceneDirs = dirList.toArray(new Directory[dirList.size()]);
try {
- iw.addIndexesNoOptimize(luceneDirs);
- iw.optimize();
+ iw.addIndexes(luceneDirs);
+ iw.forceMerge(1);
} catch (IOException ioe) {
HelpBasePlugin.logError("Merging search indexes failed.", ioe); //$NON-NLS-1$
return new HashMap<String, String[]>();
@@ -515,18 +526,19 @@ public class SearchIndex implements IHelpSearchIndex {
}
public IStatus removeDuplicates(String name, String[] index_paths) {
- TermDocs hrefDocs = null;
- TermDocs indexDocs = null;
- Term hrefTerm = new Term(FIELD_NAME, name);
try {
+ AtomicReader ar = SlowCompositeReaderWrapper.wrap(ir);
+ DocsEnum hrefDocs = null;
+ DocsEnum indexDocs = null;
+ Term hrefTerm = new Term(FIELD_NAME, name);
for (int i = 0; i < index_paths.length; i++) {
Term indexTerm = new Term(FIELD_INDEX_ID, index_paths[i]);
if (i == 0) {
- hrefDocs = ir.termDocs(hrefTerm);
- indexDocs = ir.termDocs(indexTerm);
+ hrefDocs = ar.termDocsEnum(hrefTerm);
+ indexDocs = ar.termDocsEnum(indexTerm);
} else {
- hrefDocs.seek(hrefTerm);
- indexDocs.seek(indexTerm);
+ hrefDocs = ar.termDocsEnum(hrefTerm);
+ indexDocs = ar.termDocsEnum(indexTerm);
}
removeDocuments(hrefDocs, indexDocs);
}
@@ -535,19 +547,6 @@ public class SearchIndex implements IHelpSearchIndex {
"IO exception occurred while removing duplicates of document " + name //$NON-NLS-1$
+ " from index " + indexDir.getAbsolutePath() + ".", //$NON-NLS-1$ //$NON-NLS-2$
ioe);
- } finally {
- if (hrefDocs != null) {
- try {
- hrefDocs.close();
- } catch (IOException e) {
- }
- }
- if (indexDocs != null) {
- try {
- indexDocs.close();
- } catch (IOException e) {
- }
- }
}
return Status.OK_STATUS;
}
@@ -559,33 +558,33 @@ public class SearchIndex implements IHelpSearchIndex {
* @param docs2
* @throws IOException
*/
- private void removeDocuments(TermDocs doc1, TermDocs docs2) throws IOException {
- if (!doc1.next()) {
+ private void removeDocuments(DocsEnum doc1, DocsEnum docs2) throws IOException {
+ if (doc1.nextDoc() == DocsEnum.NO_MORE_DOCS) {
return;
}
- if (!docs2.next()) {
+ if (docs2.nextDoc() == DocsEnum.NO_MORE_DOCS) {
return;
}
while (true) {
- if (doc1.doc() < docs2.doc()) {
- if (!doc1.skipTo(docs2.doc())) {
- if (!doc1.next()) {
+ if (doc1.docID() < docs2.docID()) {
+ if (doc1.advance(docs2.docID()) == DocsEnum.NO_MORE_DOCS) {
+ if (doc1.nextDoc() == DocsEnum.NO_MORE_DOCS) {
return;
}
}
- } else if (doc1.doc() > docs2.doc()) {
- if (!docs2.skipTo(doc1.doc())) {
- if (!doc1.next()) {
+ } else if (doc1.docID() > docs2.docID()) {
+ if (docs2.advance(doc1.docID()) == DocsEnum.NO_MORE_DOCS) {
+ if (doc1.nextDoc() == DocsEnum.NO_MORE_DOCS) {
return;
}
}
}
- if (doc1.doc() == docs2.doc()) {
- ir.deleteDocument(doc1.doc());
- if (!doc1.next()) {
+ if (doc1.docID() == docs2.docID()) {
+ iw.tryDeleteDocument(ir, doc1.docID());
+ if (doc1.nextDoc() == DocsEnum.NO_MORE_DOCS) {
return;
}
- if (!docs2.next()) {
+ if (docs2.nextDoc() == DocsEnum.NO_MORE_DOCS) {
return;
}
}
@@ -792,7 +791,7 @@ public class SearchIndex implements IHelpSearchIndex {
public void openSearcher() throws IOException {
synchronized (searcherCreateLock) {
if (searcher == null) {
- searcher = new IndexSearcher(luceneDirectory, false);
+ searcher = new IndexSearcher(DirectoryReader.open(luceneDirectory));
}
}
}
@@ -814,7 +813,7 @@ public class SearchIndex implements IHelpSearchIndex {
if (searches.isEmpty()) {
if (searcher != null) {
try {
- searcher.close();
+ searcher.getIndexReader().close();
} catch (IOException ioe) {
}
}
@@ -888,9 +887,11 @@ public class SearchIndex implements IHelpSearchIndex {
*/
private void cleanOldIndex() {
IndexWriter cleaner = null;
- MaxFieldLength max = new MaxFieldLength(10000);
try {
- cleaner = new IndexWriter(luceneDirectory, analyzerDescriptor.getAnalyzer(), true, max);
+ Analyzer wrapper = new LimitTokenCountAnalyzer(analyzerDescriptor.getAnalyzer(), 10000);
+ IndexWriterConfig iconfig = new IndexWriterConfig(org.apache.lucene.util.Version.LUCENE_47, wrapper);
+ iconfig.setOpenMode(OpenMode.CREATE);
+ cleaner = new IndexWriter(luceneDirectory, iconfig);
} catch (IOException ioe) {
} finally {
try {
diff --git a/eclipse.platform.ua/org.eclipse.help.base/src/org/eclipse/help/internal/search/SmartAnalyzer.java b/eclipse.platform.ua/org.eclipse.help.base/src/org/eclipse/help/internal/search/SmartAnalyzer.java
index d0a7bb7..df31d89 100644
--- a/eclipse.platform.ua/org.eclipse.help.base/src/org/eclipse/help/internal/search/SmartAnalyzer.java
+++ b/eclipse.platform.ua/org.eclipse.help.base/src/org/eclipse/help/internal/search/SmartAnalyzer.java
@@ -18,7 +18,7 @@ import org.apache.lucene.analysis.*;
* Smart Analyzer. Chooses underlying implementation based on the field which
* text is analyzed.
*/
-public final class SmartAnalyzer extends Analyzer {
+public final class SmartAnalyzer extends AnalyzerWrapper {
Analyzer pluggedInAnalyzer;
Analyzer exactAnalyzer;
@@ -31,14 +31,14 @@ public final class SmartAnalyzer extends Analyzer {
this.exactAnalyzer = new DefaultAnalyzer(locale);
}
/**
- * Creates a TokenStream which tokenizes all the text in the provided
- * Reader. Delegates to DefaultAnalyzer when field used to search for exact
+ * Delegates to DefaultAnalyzer when field used to search for exact
* match, and to plugged-in analyzer for other fields.
*/
- public final TokenStream tokenStream(String fieldName, Reader reader) {
+ @Override
+ public final Analyzer getWrappedAnalyzer(String fieldName) {
if (fieldName != null && fieldName.startsWith("exact_")) { //$NON-NLS-1$
- return exactAnalyzer.tokenStream(fieldName, reader);
+ return exactAnalyzer;
}
- return pluggedInAnalyzer.tokenStream(fieldName, reader);
+ return pluggedInAnalyzer;
}
}
diff --git a/eclipse.platform.ua/org.eclipse.help.base/src/org/eclipse/help/internal/search/WordTokenStream.java b/eclipse.platform.ua/org.eclipse.help.base/src/org/eclipse/help/internal/search/WordTokenStream.java
index 1360599..f622417 100644
--- a/eclipse.platform.ua/org.eclipse.help.base/src/org/eclipse/help/internal/search/WordTokenStream.java
+++ b/eclipse.platform.ua/org.eclipse.help.base/src/org/eclipse/help/internal/search/WordTokenStream.java
@@ -35,6 +35,7 @@ public final class WordTokenStream extends Tokenizer {
* Constructor
*/
public WordTokenStream(String fieldName, Reader reader, Locale locale) {
+ super(reader);
this.reader = reader;
boundary = BreakIterator.getWordInstance(locale);
@@ -105,6 +106,7 @@ public final class WordTokenStream extends Tokenizer {
}
public void close() throws IOException {
+ super.close();
/// Unlikely to be called as this is a reused
if (this.reader != null) {
this.reader.close();