2253 lines
86 KiB
Diff
2253 lines
86 KiB
Diff
From e9b5adb1d0477a50111afe5cf7736c542b7e4998 Mon Sep 17 00:00:00 2001
|
|
From: Sopot Cela <scela@redhat.com>
|
|
Date: Thu, 21 May 2015 13:48:41 +0200
|
|
Subject: [PATCH] Bug 466829 - Upgrade platform.ua to Lucene 5.1.0
|
|
|
|
Change-Id: I882188205c2c1e2cc1106108680dd4e94570a975
|
|
Signed-off-by: Sopot Cela <scela@redhat.com>
|
|
---
|
|
|
|
diff --git a/org.eclipse.help.base/.settings/org.eclipse.jdt.core.prefs b/org.eclipse.help.base/.settings/org.eclipse.jdt.core.prefs
|
|
index 34d2b77..07713d8 100644
|
|
--- a/org.eclipse.help.base/.settings/org.eclipse.jdt.core.prefs
|
|
+++ b/org.eclipse.help.base/.settings/org.eclipse.jdt.core.prefs
|
|
@@ -7,9 +7,9 @@
|
|
org.eclipse.jdt.core.compiler.annotation.nullanalysis=disabled
|
|
org.eclipse.jdt.core.compiler.codegen.inlineJsrBytecode=enabled
|
|
org.eclipse.jdt.core.compiler.codegen.methodParameters=do not generate
|
|
-org.eclipse.jdt.core.compiler.codegen.targetPlatform=1.5
|
|
+org.eclipse.jdt.core.compiler.codegen.targetPlatform=1.7
|
|
org.eclipse.jdt.core.compiler.codegen.unusedLocal=preserve
|
|
-org.eclipse.jdt.core.compiler.compliance=1.5
|
|
+org.eclipse.jdt.core.compiler.compliance=1.7
|
|
org.eclipse.jdt.core.compiler.debug.lineNumber=generate
|
|
org.eclipse.jdt.core.compiler.debug.localVariable=generate
|
|
org.eclipse.jdt.core.compiler.debug.sourceFile=generate
|
|
@@ -98,7 +98,7 @@
|
|
org.eclipse.jdt.core.compiler.problem.unusedTypeParameter=ignore
|
|
org.eclipse.jdt.core.compiler.problem.unusedWarningToken=warning
|
|
org.eclipse.jdt.core.compiler.problem.varargsArgumentNeedCast=warning
|
|
-org.eclipse.jdt.core.compiler.source=1.5
|
|
+org.eclipse.jdt.core.compiler.source=1.7
|
|
org.eclipse.jdt.core.formatter.align_type_members_on_columns=false
|
|
org.eclipse.jdt.core.formatter.alignment_for_arguments_in_allocation_expression=16
|
|
org.eclipse.jdt.core.formatter.alignment_for_arguments_in_enum_constant=16
|
|
diff --git a/org.eclipse.help.base/META-INF/MANIFEST.MF b/org.eclipse.help.base/META-INF/MANIFEST.MF
|
|
index ee34c8e..e4bd703 100644
|
|
--- a/org.eclipse.help.base/META-INF/MANIFEST.MF
|
|
+++ b/org.eclipse.help.base/META-INF/MANIFEST.MF
|
|
@@ -43,9 +43,11 @@
|
|
org.eclipse.core.runtime;bundle-version="[3.11.0,4.0.0)",
|
|
org.eclipse.help;bundle-version="[3.5.0,4.0.0)";visibility:=reexport,
|
|
org.eclipse.core.expressions;bundle-version="[3.4.200,4.0.0)",
|
|
- org.apache.lucene.analysis;bundle-version="[3.5.0,4.0.0)",
|
|
- org.apache.lucene.core;bundle-version="[3.5.0,4.0.0)",
|
|
- org.eclipse.core.net;bundle-version="1.2.200"
|
|
+ org.eclipse.core.net;bundle-version="1.2.200",
|
|
+ org.apache.lucene.analyzers-common;bundle-version="5.1.0",
|
|
+ org.apache.lucene.core;bundle-version="5.1.0",
|
|
+ org.apache.lucene.queryparser;bundle-version="5.1.0",
|
|
+ org.apache.lucene.analyzers-smartcn;bundle-version="5.1.0"
|
|
Import-Package: com.ibm.icu.text,
|
|
org.eclipse.equinox.http.jetty;resolution:=optional
|
|
Bundle-RequiredExecutionEnvironment: J2SE-1.5
|
|
diff --git a/org.eclipse.help.base/plugin.xml b/org.eclipse.help.base/plugin.xml
|
|
index 07a5a22..4daf3f0 100644
|
|
--- a/org.eclipse.help.base/plugin.xml
|
|
+++ b/org.eclipse.help.base/plugin.xml
|
|
@@ -83,7 +83,7 @@
|
|
</analyzer>
|
|
<analyzer
|
|
locale="zh"
|
|
- class="org.apache.lucene.analysis.cn.ChineseAnalyzer">
|
|
+ class="org.apache.lucene.analysis.cn.smart.SmartChineseAnalyzer">
|
|
</analyzer>
|
|
<analyzer
|
|
locale="cs"
|
|
diff --git a/org.eclipse.help.base/src/org/eclipse/help/internal/search/AnalyzerFactory.java b/org.eclipse.help.base/src/org/eclipse/help/internal/search/AnalyzerFactory.java
|
|
index 4ea3b21..704b0e9 100644
|
|
--- a/org.eclipse.help.base/src/org/eclipse/help/internal/search/AnalyzerFactory.java
|
|
+++ b/org.eclipse.help.base/src/org/eclipse/help/internal/search/AnalyzerFactory.java
|
|
@@ -1,5 +1,5 @@
|
|
/*******************************************************************************
|
|
- * Copyright (c) 2012 IBM Corporation and others.
|
|
+ * Copyright (c) 2012, 2015 IBM Corporation and others.
|
|
* All rights reserved. This program and the accompanying materials
|
|
* are made available under the terms of the Eclipse Public License v1.0
|
|
* which accompanies this distribution, and is available at
|
|
@@ -7,6 +7,7 @@
|
|
*
|
|
* Contributors:
|
|
* IBM Corporation - initial API and implementation
|
|
+ * Sopot Cela - Bug 466829 - Migration to Lucene 5
|
|
*******************************************************************************/
|
|
package org.eclipse.help.internal.search;
|
|
|
|
@@ -19,8 +20,9 @@
|
|
import org.apache.lucene.analysis.fr.FrenchAnalyzer;
|
|
import org.apache.lucene.analysis.nl.DutchAnalyzer;
|
|
import org.apache.lucene.analysis.ru.RussianAnalyzer;
|
|
-import org.apache.lucene.util.Version;
|
|
-import org.eclipse.core.runtime.*;
|
|
+import org.eclipse.core.runtime.CoreException;
|
|
+import org.eclipse.core.runtime.IConfigurationElement;
|
|
+import org.eclipse.core.runtime.IExecutableExtension;
|
|
|
|
/**
|
|
* A factory responsible for instantiating a lucene {@link Analyzer}.
|
|
@@ -30,27 +32,26 @@
|
|
public Analyzer create() {
|
|
if (locale == null)
|
|
return null;
|
|
- Version version = Version.LUCENE_35;
|
|
if ("pt".equals(locale)) //$NON-NLS-1$
|
|
- return new BrazilianAnalyzer(version);
|
|
+ return new BrazilianAnalyzer();
|
|
if ("ja".equals(locale)) //$NON-NLS-1$
|
|
- return new CJKAnalyzer(version);
|
|
+ return new CJKAnalyzer();
|
|
if ("ko".equals(locale)) //$NON-NLS-1$
|
|
- return new CJKAnalyzer(version);
|
|
+ return new CJKAnalyzer();
|
|
if ("pt".equals(locale)) //$NON-NLS-1$
|
|
- return new BrazilianAnalyzer(version);
|
|
+ return new BrazilianAnalyzer();
|
|
if ("cs".equals(locale)) //$NON-NLS-1$
|
|
- return new CzechAnalyzer(version);
|
|
+ return new CzechAnalyzer();
|
|
if ("de".equals(locale)) //$NON-NLS-1$
|
|
- return new GermanAnalyzer(version);
|
|
+ return new GermanAnalyzer();
|
|
if ("el".equals(locale)) //$NON-NLS-1$
|
|
- return new GreekAnalyzer(version);
|
|
+ return new GreekAnalyzer();
|
|
if ("fr".equals(locale)) //$NON-NLS-1$
|
|
- return new FrenchAnalyzer(version);
|
|
+ return new FrenchAnalyzer();
|
|
if ("nl".equals(locale)) //$NON-NLS-1$
|
|
- return new DutchAnalyzer(version);
|
|
+ return new DutchAnalyzer();
|
|
if ("ru".equals(locale)) //$NON-NLS-1$
|
|
- return new RussianAnalyzer(version);
|
|
+ return new RussianAnalyzer();
|
|
//unknown language
|
|
return null;
|
|
|
|
diff --git a/org.eclipse.help.base/src/org/eclipse/help/internal/search/Analyzer_en.java b/org.eclipse.help.base/src/org/eclipse/help/internal/search/Analyzer_en.java
|
|
index a066aa4..a2183f9 100644
|
|
--- a/org.eclipse.help.base/src/org/eclipse/help/internal/search/Analyzer_en.java
|
|
+++ b/org.eclipse.help.base/src/org/eclipse/help/internal/search/Analyzer_en.java
|
|
@@ -8,18 +8,25 @@
|
|
* Contributors:
|
|
* IBM Corporation - initial API and implementation
|
|
* Alexander Kurtakov - Bug 460787
|
|
+ * Sopot Cela - Bug 466829 - Migration to Lucene 5
|
|
*******************************************************************************/
|
|
package org.eclipse.help.internal.search;
|
|
-import java.io.*;
|
|
-import java.util.HashSet;
|
|
-import java.util.Set;
|
|
+import java.util.ArrayList;
|
|
+import java.util.List;
|
|
|
|
-import org.apache.lucene.analysis.*;
|
|
-import org.apache.lucene.util.Version;
|
|
+import org.apache.lucene.analysis.Analyzer;
|
|
+import org.apache.lucene.analysis.TokenStream;
|
|
+import org.apache.lucene.analysis.Tokenizer;
|
|
+import org.apache.lucene.analysis.core.StopFilter;
|
|
+import org.apache.lucene.analysis.en.EnglishPossessiveFilter;
|
|
+import org.apache.lucene.analysis.en.PorterStemFilter;
|
|
+import org.apache.lucene.analysis.miscellaneous.KeywordRepeatFilter;
|
|
+import org.apache.lucene.analysis.util.CharArraySet;
|
|
/**
|
|
* Lucene Analyzer for English. LowerCaseTokenizer->StopFilter->PorterStemFilter
|
|
*/
|
|
public final class Analyzer_en extends Analyzer {
|
|
+
|
|
/**
|
|
* Constructor for Analyzer_en.
|
|
*/
|
|
@@ -27,18 +34,25 @@
|
|
super();
|
|
}
|
|
/**
|
|
- * Creates a TokenStream which tokenizes all the text in the provided
|
|
+ * Creates a TokenStreamComponents which tokenizes all the text in the provided
|
|
* Reader.
|
|
*/
|
|
- public final TokenStream tokenStream(String fieldName, Reader reader) {
|
|
- return new PorterStemFilter(new StopFilter(Version.LUCENE_30, new LowerCaseAndDigitsTokenizer(reader), getStopWords(), false));
|
|
+ @Override
|
|
+ public final TokenStreamComponents createComponents(String fieldName) {
|
|
+ final Tokenizer source;
|
|
+ source = new LowerCaseAndDigitsTokenizer();
|
|
+ TokenStream result = new EnglishPossessiveFilter(source);
|
|
+ result = new StopFilter(result, new CharArraySet(getStopWords(), false));
|
|
+ result = new KeywordRepeatFilter(result);
|
|
+ result = new PorterStemFilter(result);
|
|
+ return new TokenStreamComponents(source, result);
|
|
}
|
|
|
|
- private Set<String> stopWords;
|
|
+ private List<String> stopWords;
|
|
|
|
- private Set<String> getStopWords() {
|
|
+ private List<String> getStopWords() {
|
|
if ( stopWords == null ) {
|
|
- stopWords = new HashSet<String>();
|
|
+ stopWords = new ArrayList<String>();
|
|
for (int i = 0; i < STOP_WORDS.length; i++) {
|
|
stopWords.add(STOP_WORDS[i]);
|
|
}
|
|
diff --git a/org.eclipse.help.base/src/org/eclipse/help/internal/search/DefaultAnalyzer.java b/org.eclipse.help.base/src/org/eclipse/help/internal/search/DefaultAnalyzer.java
|
|
index 4109474..e3c8722 100644
|
|
--- a/org.eclipse.help.base/src/org/eclipse/help/internal/search/DefaultAnalyzer.java
|
|
+++ b/org.eclipse.help.base/src/org/eclipse/help/internal/search/DefaultAnalyzer.java
|
|
@@ -7,22 +7,20 @@
|
|
* Contributors:
|
|
* IBM Corporation - initial API and implementation
|
|
* Alexander Kurtakov - Bug 460787
|
|
+ * Sopot Cela - Bug 466829 - Migration to Lucene 5
|
|
*******************************************************************************/
|
|
package org.eclipse.help.internal.search;
|
|
|
|
-import java.io.Reader;
|
|
import java.util.Locale;
|
|
import java.util.StringTokenizer;
|
|
|
|
-import com.ibm.icu.text.BreakIterator;
|
|
-
|
|
import org.apache.lucene.analysis.Analyzer;
|
|
-import org.apache.lucene.analysis.LowerCaseFilter;
|
|
-import org.apache.lucene.analysis.TokenStream;
|
|
-import org.apache.lucene.util.Version;
|
|
+import org.apache.lucene.analysis.Tokenizer;
|
|
+import org.apache.lucene.analysis.core.LowerCaseFilter;
|
|
+import org.eclipse.core.runtime.Platform;
|
|
import org.eclipse.help.internal.base.HelpBasePlugin;
|
|
|
|
-import org.eclipse.core.runtime.Platform;
|
|
+import com.ibm.icu.text.BreakIterator;
|
|
|
|
|
|
/**
|
|
@@ -84,11 +82,14 @@
|
|
}
|
|
|
|
/**
|
|
- * Creates a TokenStream which tokenizes all the text in the provided
|
|
+ * Creates a TokenStreamComponents which tokenizes all the text in the provided
|
|
* Reader.
|
|
*/
|
|
- public final TokenStream tokenStream(String fieldName, Reader reader) {
|
|
- return new LowerCaseFilter(Version.LUCENE_30, new WordTokenStream(fieldName, reader, locale));
|
|
+ @Override
|
|
+ public final TokenStreamComponents createComponents(String fieldName) {
|
|
+ Tokenizer source = new WordTokenStream(locale);
|
|
+ LowerCaseFilter filter = new LowerCaseFilter(source);
|
|
+ return new TokenStreamComponents(source, filter);
|
|
}
|
|
|
|
/**
|
|
diff --git a/org.eclipse.help.base/src/org/eclipse/help/internal/search/LowerCaseAndDigitsTokenizer.java b/org.eclipse.help.base/src/org/eclipse/help/internal/search/LowerCaseAndDigitsTokenizer.java
|
|
index a475688..cbb2472 100644
|
|
--- a/org.eclipse.help.base/src/org/eclipse/help/internal/search/LowerCaseAndDigitsTokenizer.java
|
|
+++ b/org.eclipse.help.base/src/org/eclipse/help/internal/search/LowerCaseAndDigitsTokenizer.java
|
|
@@ -8,27 +8,26 @@
|
|
* Contributors:
|
|
* IBM Corporation - initial API and implementation
|
|
* Alexander Kurtakov - Bug 460787
|
|
+ * Sopot Cela - Bug 466829 - Migration to Lucene 5
|
|
*******************************************************************************/
|
|
package org.eclipse.help.internal.search;
|
|
|
|
-import java.io.*;
|
|
-
|
|
-import org.apache.lucene.analysis.*;
|
|
-import org.apache.lucene.util.Version;
|
|
+import org.apache.lucene.analysis.util.CharTokenizer;
|
|
|
|
/**
|
|
* Tokenizer breaking words around letters or digits.
|
|
*/
|
|
public class LowerCaseAndDigitsTokenizer extends CharTokenizer {
|
|
|
|
- public LowerCaseAndDigitsTokenizer(Reader input) {
|
|
- super(Version.LUCENE_30, input);
|
|
+ public LowerCaseAndDigitsTokenizer() {
|
|
+ super();
|
|
}
|
|
protected char normalize(char c) {
|
|
return Character.toLowerCase(c);
|
|
}
|
|
|
|
- protected boolean isTokenChar(char c) {
|
|
+ @Override
|
|
+ public boolean isTokenChar(int c) {
|
|
return Character.isLetterOrDigit(c);
|
|
}
|
|
|
|
diff --git a/org.eclipse.help.base/src/org/eclipse/help/internal/search/LuceneSearchDocument.java b/org.eclipse.help.base/src/org/eclipse/help/internal/search/LuceneSearchDocument.java
|
|
index 60a545d..804bf5e 100644
|
|
--- a/org.eclipse.help.base/src/org/eclipse/help/internal/search/LuceneSearchDocument.java
|
|
+++ b/org.eclipse.help.base/src/org/eclipse/help/internal/search/LuceneSearchDocument.java
|
|
@@ -1,5 +1,5 @@
|
|
/*******************************************************************************
|
|
- * Copyright (c) 2010, 2011 IBM Corporation and others.
|
|
+ * Copyright (c) 2010, 2015 IBM Corporation and others.
|
|
* All rights reserved. This program and the accompanying materials
|
|
* are made available under the terms of the Eclipse Public License v1.0
|
|
* which accompanies this distribution, and is available at
|
|
@@ -7,6 +7,7 @@
|
|
*
|
|
* Contributors:
|
|
* IBM Corporation - initial API and implementation
|
|
+ * Sopot Cela - Bug 466829 - Migration to Lucene 5
|
|
*******************************************************************************/
|
|
|
|
package org.eclipse.help.internal.search;
|
|
@@ -16,6 +17,8 @@
|
|
|
|
import org.apache.lucene.document.Document;
|
|
import org.apache.lucene.document.Field;
|
|
+import org.apache.lucene.document.StoredField;
|
|
+import org.apache.lucene.document.TextField;
|
|
import org.eclipse.help.search.ISearchDocument;
|
|
|
|
/**
|
|
@@ -31,22 +34,22 @@
|
|
}
|
|
|
|
public void setTitle(String title) {
|
|
- doc.add(new Field("title", title, Field.Store.NO, Field.Index.ANALYZED)); //$NON-NLS-1$
|
|
- doc.add(new Field("exact_title", title, Field.Store.NO, Field.Index.ANALYZED)); //$NON-NLS-1$
|
|
- doc.add(new Field("raw_title", title, Field.Store.YES, Field.Index.NO)); //$NON-NLS-1$
|
|
+ doc.add(new TextField("title", title, Field.Store.NO)); //$NON-NLS-1$
|
|
+ doc.add(new TextField("exact_title", title, Field.Store.NO)); //$NON-NLS-1$
|
|
+ doc.add(new StoredField("raw_title", title)); //$NON-NLS-1$
|
|
}
|
|
|
|
public void setSummary(String summary) {
|
|
- doc.add(new Field("summary", summary, Field.Store.YES, Field.Index.NO)); //$NON-NLS-1$
|
|
+ doc.add(new StoredField("summary", summary)); //$NON-NLS-1$
|
|
}
|
|
|
|
public void addContents(String contents) {
|
|
- doc.add(new Field("contents", new StringReader(contents))); //$NON-NLS-1$
|
|
- doc.add(new Field("exact_contents", new StringReader(contents))); //$NON-NLS-1$
|
|
+ doc.add(new TextField("contents", new StringReader(contents))); //$NON-NLS-1$
|
|
+ doc.add(new TextField("exact_contents", new StringReader(contents))); //$NON-NLS-1$
|
|
}
|
|
|
|
public void setHasFilters(boolean hasFilters) {
|
|
- doc.add(new Field("filters", Boolean.toString(hasFilters), Field.Store.YES, Field.Index.NO)); //$NON-NLS-1$
|
|
+ doc.add(new StoredField("filters", Boolean.toString(hasFilters))); //$NON-NLS-1$
|
|
}
|
|
|
|
public Document getDocument() {
|
|
@@ -54,8 +57,8 @@
|
|
}
|
|
|
|
public void addContents(Reader contents, Reader exactContents) {
|
|
- doc.add(new Field("contents", contents)); //$NON-NLS-1$
|
|
- doc.add(new Field("exact_contents", exactContents)); //$NON-NLS-1$
|
|
+ doc.add(new TextField("contents", contents)); //$NON-NLS-1$
|
|
+ doc.add(new TextField("exact_contents", exactContents)); //$NON-NLS-1$
|
|
}
|
|
|
|
}
|
|
diff --git a/org.eclipse.help.base/src/org/eclipse/help/internal/search/PluginIndex.java b/org.eclipse.help.base/src/org/eclipse/help/internal/search/PluginIndex.java
|
|
index ca9cd67..7952c8d 100644
|
|
--- a/org.eclipse.help.base/src/org/eclipse/help/internal/search/PluginIndex.java
|
|
+++ b/org.eclipse.help.base/src/org/eclipse/help/internal/search/PluginIndex.java
|
|
@@ -1,5 +1,5 @@
|
|
/*******************************************************************************
|
|
- * Copyright (c) 2005, 2014 IBM Corporation and others.
|
|
+ * Copyright (c) 2005, 2015 IBM Corporation and others.
|
|
* All rights reserved. This program and the accompanying materials
|
|
* are made available under the terms of the Eclipse Public License v1.0
|
|
* which accompanies this distribution, and is available at
|
|
@@ -7,6 +7,7 @@
|
|
*
|
|
* Contributors:
|
|
* IBM Corporation - initial API and implementation
|
|
+ * Sopot Cela - Bug 466829 - Migration to Lucene 5
|
|
*******************************************************************************/
|
|
package org.eclipse.help.internal.search;
|
|
|
|
@@ -152,6 +153,7 @@
|
|
.getProperty(SearchIndex.DEPENDENCIES_KEY_ANALYZER);
|
|
if (!targetIndex.isLuceneCompatible(lucene)
|
|
|| !targetIndex.isAnalyzerCompatible(analyzer)) {
|
|
+ HelpBasePlugin.logError("Error trying to consume Lucene index from bundle "+bundle.toString()+". Please use an index built with Lucene 5 or higher.", null); //$NON-NLS-1$ //$NON-NLS-2$
|
|
return false;
|
|
}
|
|
} catch (MalformedURLException mue) {
|
|
diff --git a/org.eclipse.help.base/src/org/eclipse/help/internal/search/QueryBuilder.java b/org.eclipse.help.base/src/org/eclipse/help/internal/search/QueryBuilder.java
|
|
deleted file mode 100644
|
|
index 08cf58a..0000000
|
|
--- a/org.eclipse.help.base/src/org/eclipse/help/internal/search/QueryBuilder.java
|
|
+++ /dev/null
|
|
@@ -1,459 +0,0 @@
|
|
-/*******************************************************************************
|
|
- * Copyright (c) 2000, 2012 IBM Corporation and others.
|
|
- * All rights reserved. This program and the accompanying materials
|
|
- * are made available under the terms of the Eclipse Public License v1.0
|
|
- * which accompanies this distribution, and is available at
|
|
- * http://www.eclipse.org/legal/epl-v10.html
|
|
- *
|
|
- * Contributors:
|
|
- * IBM Corporation - initial API and implementation
|
|
- * Chris Torrence - patch for bug Bug 107648
|
|
- *******************************************************************************/
|
|
-package org.eclipse.help.internal.search;
|
|
-import java.io.*;
|
|
-import java.util.ArrayList;
|
|
-import java.util.Collection;
|
|
-import java.util.Iterator;
|
|
-import java.util.List;
|
|
-import java.util.Locale;
|
|
-import java.util.StringTokenizer;
|
|
-
|
|
-import org.apache.lucene.analysis.*;
|
|
-import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
|
-import org.apache.lucene.index.*;
|
|
-import org.apache.lucene.search.*;
|
|
-import org.eclipse.help.internal.base.*;
|
|
-/**
|
|
- * Build query acceptable by the search engine.
|
|
- */
|
|
-public class QueryBuilder {
|
|
- // Maximum allowed number of terms
|
|
- private static final int MAX_TERMS = 10;
|
|
- // Maximum allowed number of ORs
|
|
- private static final int MAX_UNIONS = 4;
|
|
- // Maximum allowed number terms with wild cards
|
|
- private static final int MAX_WILD_TERMS = 2;
|
|
- // Query from user
|
|
- private String searchWords;
|
|
- // Descriptor of Analyzer to process the query words
|
|
- private AnalyzerDescriptor analyzerDesc;
|
|
- // Analyzer to process the query words
|
|
- private Analyzer analyzer;
|
|
- // List of QueryWordsToken
|
|
- private List<QueryWordsToken> analyzedTokens;
|
|
- // List of words to highlight
|
|
- private List<String> highlightWords = new ArrayList<String>();
|
|
- private Locale locale;
|
|
- /**
|
|
- * Creates a query builder for the search word. The search word is processed
|
|
- * by a lexical analyzer.
|
|
- */
|
|
- public QueryBuilder(String searchWords, AnalyzerDescriptor analyzerDesc) {
|
|
- this.searchWords = searchWords;
|
|
- String language = analyzerDesc.getLang();
|
|
- if (language.length() >= 5) {
|
|
- this.locale = new Locale(language.substring(0, 2), language
|
|
- .substring(3, 5));
|
|
- } else {
|
|
- this.locale = new Locale(language.substring(0, 2), ""); //$NON-NLS-1$
|
|
- }
|
|
- this.analyzerDesc = analyzerDesc;
|
|
- this.analyzer = analyzerDesc.getAnalyzer();
|
|
- }
|
|
- /**
|
|
- * Splits user query into tokens and returns a list of QueryWordsToken's.
|
|
- */
|
|
- private List<QueryWordsToken> tokenizeUserQuery(String searchWords) {
|
|
- List<QueryWordsToken> tokenList = new ArrayList<QueryWordsToken>();
|
|
- //Divide along quotation marks
|
|
- //StringTokenizer qTokenizer = new StringTokenizer(searchWords.trim(),
|
|
- // "\"", true); //$NON-NLS-1$
|
|
- boolean withinQuotation = false;
|
|
- String quotedString = ""; //$NON-NLS-1$
|
|
- int termCount = 0;// keep track of number of terms to disallow too many
|
|
-
|
|
- int fromIndex = -1;
|
|
- searchWords = searchWords.trim();
|
|
- while((fromIndex = searchWords.indexOf("\"", fromIndex+1))!= -1){ //$NON-NLS-1$
|
|
- withinQuotation = !withinQuotation;
|
|
- }
|
|
- if( withinQuotation ) {
|
|
- searchWords = searchWords + "\""; //$NON-NLS-1$
|
|
- withinQuotation = !withinQuotation;
|
|
- }
|
|
-
|
|
- StringTokenizer qTokenizer = new StringTokenizer(searchWords,"\"",true); //$NON-NLS-1$
|
|
- int orCount = 0; // keep track of number of ORs to disallow too many
|
|
- while (qTokenizer.hasMoreTokens()) {
|
|
- String curToken = qTokenizer.nextToken();
|
|
- if (curToken.equals("\"")) { //$NON-NLS-1$
|
|
- if (withinQuotation) {
|
|
- // check for too many terms
|
|
- if (BaseHelpSystem.getMode() == BaseHelpSystem.MODE_INFOCENTER
|
|
- && ++termCount > MAX_TERMS) {
|
|
- throw new QueryTooComplexException();
|
|
- }
|
|
- tokenList.add(QueryWordsToken.exactPhrase(quotedString));
|
|
- } else {
|
|
- quotedString = ""; //$NON-NLS-1$
|
|
- }
|
|
- withinQuotation = !withinQuotation;
|
|
- continue;
|
|
- } else if (withinQuotation) {
|
|
- quotedString = curToken;
|
|
- continue;
|
|
- } else {
|
|
- //divide unquoted strings along white space
|
|
- StringTokenizer parser = new StringTokenizer(curToken.trim());
|
|
- while (parser.hasMoreTokens()) {
|
|
- String token = parser.nextToken();
|
|
- if (token.equalsIgnoreCase(QueryWordsToken.AND().value)) {
|
|
- tokenList.add(QueryWordsToken.AND());
|
|
- } else if (token
|
|
- .equalsIgnoreCase(QueryWordsToken.OR().value)) {
|
|
- // Check for too many OR terms
|
|
- if (BaseHelpSystem.getMode() == BaseHelpSystem.MODE_INFOCENTER
|
|
- && ++orCount > MAX_UNIONS) {
|
|
- throw new QueryTooComplexException();
|
|
- }
|
|
- tokenList.add(QueryWordsToken.OR());
|
|
- } else if (token
|
|
- .equalsIgnoreCase(QueryWordsToken.NOT().value)) {
|
|
- tokenList.add(QueryWordsToken.NOT());
|
|
- } else {
|
|
- // check for too many terms
|
|
- if (BaseHelpSystem.getMode() == BaseHelpSystem.MODE_INFOCENTER
|
|
- && ++termCount > MAX_TERMS) {
|
|
- throw new QueryTooComplexException();
|
|
- }
|
|
- tokenList.add(QueryWordsToken.word(token));
|
|
- }
|
|
- }
|
|
- }
|
|
- }
|
|
- return tokenList;
|
|
- }
|
|
- /**
|
|
- * Apply the Analyzer to the search tokens and return the list of processed
|
|
- * QueryWordsToken's.
|
|
- */
|
|
- private List<QueryWordsToken> analyzeTokens(List<QueryWordsToken> tokens) {
|
|
- boolean isTokenAfterNot = false;
|
|
- List<QueryWordsToken> newTokens = new ArrayList<QueryWordsToken>();
|
|
- int wildCardTermCount = 0;
|
|
- for (int i = 0; i < tokens.size(); i++) {
|
|
- QueryWordsToken token = tokens.get(i);
|
|
- if (token.type == QueryWordsToken.WORD) {
|
|
- int questionMIndex = token.value.indexOf('?');
|
|
- int starIndex = token.value.indexOf('*');
|
|
- if (starIndex >= 0 || questionMIndex >= 0) {
|
|
- if (BaseHelpSystem.getMode() == BaseHelpSystem.MODE_INFOCENTER
|
|
- && ++wildCardTermCount > MAX_WILD_TERMS) {
|
|
- throw new QueryTooComplexException();
|
|
- }
|
|
- newTokens.add(QueryWordsToken.word(token.value
|
|
- .toLowerCase(locale)));
|
|
- // add word to the list of words to highlight
|
|
- if (!isTokenAfterNot && !highlightWords.contains(token.value)) {
|
|
- highlightWords.add(token.value);
|
|
- }
|
|
- } else {
|
|
- List<String> wordList = analyzeText(analyzer, "contents", //$NON-NLS-1$
|
|
- token.value);
|
|
- if (wordList.size() > 0) {
|
|
- if (!isTokenAfterNot && !highlightWords.contains(token.value)) {
|
|
- // add original word to the list of words to
|
|
- // highlight
|
|
- highlightWords.add(token.value);
|
|
- }
|
|
- if (wordList.size() == 1) {
|
|
- String word = wordList.get(0);
|
|
- newTokens.add(QueryWordsToken.word(word));
|
|
- // add analyzed word to the list of words to
|
|
- // highlight
|
|
- // this is required to highlight stemmed words
|
|
- if (!isTokenAfterNot && !highlightWords.contains(word)) {
|
|
- highlightWords.add(word);
|
|
- }
|
|
- } else {
|
|
- QueryWordsPhrase phrase = QueryWordsToken.phrase();
|
|
- for (Iterator<String> it = wordList.iterator(); it
|
|
- .hasNext();) {
|
|
- String word = it.next();
|
|
- phrase.addWord(word);
|
|
- // add each analyzed word to the list of words
|
|
- // to highlight
|
|
- // this is only required to highlight stemmed
|
|
- // words.
|
|
- // Adding words should not be done when
|
|
- // DefaultAnalyzer is used,
|
|
- // because it does not perform stemming and
|
|
- // common words removal
|
|
- // which would result in common characters
|
|
- // highlighted all over (bug 30263)
|
|
- if (!analyzerDesc.getId().startsWith(
|
|
- HelpBasePlugin.PLUGIN_ID + "#")) { //$NON-NLS-1$
|
|
- if (!isTokenAfterNot && !highlightWords.contains(word)) {
|
|
- highlightWords.add(word);
|
|
- }
|
|
- }
|
|
- }
|
|
- newTokens.add(phrase);
|
|
- }
|
|
- }
|
|
- }
|
|
- } else if (// forget ANDs
|
|
- /*
|
|
- * token.type == SearchQueryToken.AND ||
|
|
- */
|
|
- token.type == QueryWordsToken.OR
|
|
- || token.type == QueryWordsToken.NOT)
|
|
- newTokens.add(token);
|
|
- else if (token.type == QueryWordsToken.EXACT_PHRASE) {
|
|
- List<String> wordList = analyzeText(analyzer, "exact_contents", //$NON-NLS-1$
|
|
- token.value);
|
|
- if (wordList.size() > 0) {
|
|
- if (!isTokenAfterNot && !highlightWords.contains(token.value)) {
|
|
- // add original word to the list of words to highlight
|
|
- highlightWords.add(token.value);
|
|
- }
|
|
- }
|
|
- QueryWordsExactPhrase phrase = QueryWordsToken.exactPhrase();
|
|
- for (Iterator<String> it = wordList.iterator(); it.hasNext();) {
|
|
- String word = it.next();
|
|
- phrase.addWord(word);
|
|
- // add analyzed word to the list of words to highlight
|
|
- // if (!highlightWords.contains(word))
|
|
- // highlightWords.add(word);
|
|
- }
|
|
- // add phrase only if not empty
|
|
- if (phrase.getWords().size() > 0) {
|
|
- newTokens.add(phrase);
|
|
- }
|
|
- }
|
|
- isTokenAfterNot = (token.type == QueryWordsToken.NOT);
|
|
- }
|
|
- return newTokens;
|
|
- }
|
|
- /**
|
|
- * Get a list of tokens corresponding to a search word or phrase
|
|
- *
|
|
- * @return List of String
|
|
- */
|
|
- private List<String> analyzeText(Analyzer analyzer, String fieldName, String text) {
|
|
- List<String> words = new ArrayList<String>(1);
|
|
- Reader reader = new StringReader(text);
|
|
- TokenStream tStream = analyzer.tokenStream(fieldName, reader);
|
|
-
|
|
- CharTermAttribute termAttribute = tStream.getAttribute(CharTermAttribute.class);
|
|
- try {
|
|
- while (tStream.incrementToken()) {
|
|
- String term = termAttribute.toString();
|
|
- words.add(term);
|
|
- }
|
|
- reader.close();
|
|
- } catch (IOException ioe) {
|
|
- }
|
|
-
|
|
- return words;
|
|
- }
|
|
- /**
|
|
- * Obtains Lucene Query from tokens
|
|
- *
|
|
- * @return Query or null if no query could be created
|
|
- */
|
|
- private Query createLuceneQuery(List<QueryWordsToken> searchTokens, String[] fieldNames,
|
|
- float[] boosts) {
|
|
- // Get queries for parts separated by OR
|
|
- List<Query> requiredQueries = getRequiredQueries(searchTokens, fieldNames,
|
|
- boosts);
|
|
- if (requiredQueries.size() == 0)
|
|
- return null;
|
|
- else if (requiredQueries.size() <= 1)
|
|
- return requiredQueries.get(0);
|
|
- else
|
|
- /* if (requiredQueries.size() > 1) */
|
|
- // OR queries
|
|
- return (orQueries(requiredQueries));
|
|
- }
|
|
- /**
|
|
- * Obtains Lucene queries for token sequences separated at OR.
|
|
- *
|
|
- * @return List of Query (could be empty)
|
|
- */
|
|
- private List<Query> getRequiredQueries(List<QueryWordsToken> tokens, String[] fieldNames,
|
|
- float[] boosts) {
|
|
- List<Query> oredQueries = new ArrayList<Query>();
|
|
- ArrayList<QueryWordsToken> requiredQueryTokens = new ArrayList<QueryWordsToken>();
|
|
- for (int i = 0; i < tokens.size(); i++) {
|
|
- QueryWordsToken token = tokens.get(i);
|
|
- if (token.type != QueryWordsToken.OR) {
|
|
- requiredQueryTokens.add(token);
|
|
- } else {
|
|
- Query reqQuery = getRequiredQuery(requiredQueryTokens,
|
|
- fieldNames, boosts);
|
|
- if (reqQuery != null)
|
|
- oredQueries.add(reqQuery);
|
|
- requiredQueryTokens = new ArrayList<QueryWordsToken>();
|
|
- }
|
|
- }
|
|
- Query reqQuery = getRequiredQuery(requiredQueryTokens, fieldNames,
|
|
- boosts);
|
|
- if (reqQuery != null)
|
|
- oredQueries.add(reqQuery);
|
|
- return oredQueries;
|
|
- }
|
|
- private Query orQueries(Collection<Query> queries) {
|
|
- BooleanQuery bq = new BooleanQuery();
|
|
- for (Iterator<Query> it = queries.iterator(); it.hasNext();) {
|
|
- Query q = it.next();
|
|
- bq.add(q, BooleanClause.Occur.SHOULD);
|
|
- }
|
|
- return bq;
|
|
- }
|
|
- /**
|
|
- * Obtains Lucene Query for tokens containing only AND and NOT operators.
|
|
- *
|
|
- * @return BooleanQuery or null if no query could be created from the tokens
|
|
- */
|
|
- private Query getRequiredQuery(List<QueryWordsToken> requiredTokens, String[] fieldNames,
|
|
- float[] boosts) {
|
|
- BooleanQuery retQuery = new BooleanQuery();
|
|
- boolean requiredTermExist = false;
|
|
- // Parse tokens left to right
|
|
- QueryWordsToken operator = null;
|
|
- for (int i = 0; i < requiredTokens.size(); i++) {
|
|
- QueryWordsToken token = requiredTokens.get(i);
|
|
- if (token.type == QueryWordsToken.AND
|
|
- || token.type == QueryWordsToken.NOT) {
|
|
- operator = token;
|
|
- continue;
|
|
- }
|
|
- // Creates queries for all fields
|
|
- Query qs[] = new Query[fieldNames.length];
|
|
- for (int f = 0; f < fieldNames.length; f++) {
|
|
- qs[f] = token.createLuceneQuery(fieldNames[f], boosts[f]);
|
|
- }
|
|
- // creates the boolean query of all fields
|
|
- Query q = qs[0];
|
|
- if (fieldNames.length > 1) {
|
|
- BooleanQuery allFieldsQuery = new BooleanQuery();
|
|
- for (int f = 0; f < fieldNames.length; f++)
|
|
- allFieldsQuery.add(qs[f], BooleanClause.Occur.SHOULD);
|
|
- q = allFieldsQuery;
|
|
- }
|
|
- if (operator != null && operator.type == QueryWordsToken.NOT) {
|
|
- retQuery.add(q, BooleanClause.Occur.MUST_NOT); // add as prohibited
|
|
- } else {
|
|
- retQuery.add(q, BooleanClause.Occur.MUST); // add as required
|
|
- requiredTermExist = true;
|
|
- }
|
|
- }
|
|
- if (!requiredTermExist) {
|
|
- return null; // cannot search for prohibited only
|
|
- }
|
|
- return retQuery;
|
|
- }
|
|
- private Query getLuceneQuery(String[] fieldNames, float[] boosts) {
|
|
- Query luceneQuery = createLuceneQuery(analyzedTokens, fieldNames,
|
|
- boosts);
|
|
- return luceneQuery;
|
|
- }
|
|
- /**
|
|
- * @param fieldNames -
|
|
- * Collection of field names of type String (e.g. "h1"); the
|
|
- * search will be performed on the given fields
|
|
- * @param fieldSearchOnly -
|
|
- * boolean indicating if field only search should be performed;
|
|
- * if set to false, default field "contents" and all other fields
|
|
- * will be searched
|
|
- */
|
|
- public Query getLuceneQuery(Collection<String> fieldNames, boolean fieldSearchOnly)
|
|
- throws QueryTooComplexException {
|
|
- // split search query into tokens
|
|
- List<QueryWordsToken> userTokens = tokenizeUserQuery(searchWords);
|
|
- analyzedTokens = analyzeTokens(userTokens);
|
|
- return buildLuceneQuery(fieldNames, fieldSearchOnly);
|
|
- }
|
|
- /**
|
|
- * @param fieldNames -
|
|
- * Collection of field names of type String (e.g. "h1"); the
|
|
- * search will be performed on the given fields
|
|
- * @param fieldSearchOnly -
|
|
- * boolean indicating if field only search should be performed;
|
|
- * if set to false, default field "contents" and all other fields
|
|
- * will be searched
|
|
- */
|
|
- private Query buildLuceneQuery(Collection<String> fieldNames,
|
|
- boolean fieldSearchOnly) {
|
|
- String[] fields;
|
|
- float[] boosts;
|
|
- if (fieldSearchOnly) {
|
|
- fields = new String[fieldNames.size()];
|
|
- boosts = new float[fieldNames.size()];
|
|
- Iterator<String> fieldNamesIt = fieldNames.iterator();
|
|
- for (int i = 0; i < fieldNames.size(); i++) {
|
|
- fields[i] = fieldNamesIt.next();
|
|
- boosts[i] = 5.0f;
|
|
- }
|
|
- } else {
|
|
- fields = new String[fieldNames.size() + 2];
|
|
- boosts = new float[fieldNames.size() + 2];
|
|
- Iterator<String> fieldNamesIt = fieldNames.iterator();
|
|
- for (int i = 0; i < fieldNames.size(); i++) {
|
|
- fields[i] = fieldNamesIt.next();
|
|
- boosts[i] = 5.0f;
|
|
- }
|
|
- fields[fieldNames.size()] = "contents"; //$NON-NLS-1$
|
|
- boosts[fieldNames.size()] = 1.0f;
|
|
- fields[fieldNames.size()+1] = "title"; //$NON-NLS-1$
|
|
- boosts[fieldNames.size()+1] = 1.0f;
|
|
- }
|
|
- Query query = getLuceneQuery(fields, boosts);
|
|
- query = improveRankingForUnqotedPhrase(query, fields, boosts);
|
|
- return query;
|
|
- }
|
|
- /**
|
|
- * If user query contained only words (no quotaions nor operators) extends
|
|
- * query with term phrase representing entire user query i.e for user string
|
|
- * a b, the query a AND b will be extended to "a b" OR a AND b
|
|
- */
|
|
- private Query improveRankingForUnqotedPhrase(Query query, String[] fields,
|
|
- float[] boosts) {
|
|
- if (query == null)
|
|
- return query;
|
|
- // check if all tokens are words
|
|
- for (int i = 0; i < analyzedTokens.size(); i++)
|
|
- if (analyzedTokens.get(i).type != QueryWordsToken.WORD)
|
|
- return query;
|
|
- // Create phrase query for all tokens and OR with original query
|
|
- BooleanQuery booleanQuery = new BooleanQuery();
|
|
- booleanQuery.add(query, BooleanClause.Occur.SHOULD);
|
|
- PhraseQuery[] phraseQueries = new PhraseQuery[fields.length];
|
|
- for (int f = 0; f < fields.length; f++) {
|
|
- phraseQueries[f] = new PhraseQuery();
|
|
- for (int i = 0; i < analyzedTokens.size(); i++) {
|
|
- Term t = new Term(fields[f], analyzedTokens
|
|
- .get(i).value);
|
|
- phraseQueries[f].add(t);
|
|
- }
|
|
- phraseQueries[f].setBoost(10 * boosts[f]);
|
|
- booleanQuery.add(phraseQueries[f], BooleanClause.Occur.SHOULD);
|
|
- }
|
|
- return booleanQuery;
|
|
- }
|
|
- /**
|
|
- * Obtains analyzed terms from query as one string. Words are double quoted,
|
|
- * and separated by space. The analyzed words are needed for highlighting
|
|
- * word roots.
|
|
- */
|
|
- public String gethighlightTerms() {
|
|
- StringBuffer buf = new StringBuffer();
|
|
- for (Iterator<String> it = highlightWords.iterator(); it.hasNext();) {
|
|
- buf.append('"');
|
|
- buf.append(it.next());
|
|
- buf.append("\" "); //$NON-NLS-1$
|
|
- }
|
|
- return buf.toString();
|
|
- }
|
|
-}
|
|
diff --git a/org.eclipse.help.base/src/org/eclipse/help/internal/search/QueryWordsExactPhrase.java b/org.eclipse.help.base/src/org/eclipse/help/internal/search/QueryWordsExactPhrase.java
|
|
deleted file mode 100644
|
|
index 324b8e1..0000000
|
|
--- a/org.eclipse.help.base/src/org/eclipse/help/internal/search/QueryWordsExactPhrase.java
|
|
+++ /dev/null
|
|
@@ -1,50 +0,0 @@
|
|
-/*******************************************************************************
|
|
- * Copyright (c) 2000, 2011 IBM Corporation and others.
|
|
- * All rights reserved. This program and the accompanying materials
|
|
- * are made available under the terms of the Eclipse Public License v1.0
|
|
- * which accompanies this distribution, and is available at
|
|
- * http://www.eclipse.org/legal/epl-v10.html
|
|
- *
|
|
- * Contributors:
|
|
- * IBM Corporation - initial API and implementation
|
|
- *******************************************************************************/
|
|
-package org.eclipse.help.internal.search;
|
|
-import java.util.ArrayList;
|
|
-import java.util.Iterator;
|
|
-import java.util.List;
|
|
-
|
|
-import org.apache.lucene.index.*;
|
|
-import org.apache.lucene.search.*;
|
|
-/**
|
|
- * Represents a quoted token in user search query words
|
|
- */
|
|
-public class QueryWordsExactPhrase extends QueryWordsToken {
|
|
- private List<String> words;
|
|
- public QueryWordsExactPhrase() {
|
|
- super(QueryWordsToken.EXACT_PHRASE, ""); //$NON-NLS-1$
|
|
- words = new ArrayList<String>();
|
|
- }
|
|
- public void addWord(String word) {
|
|
- words.add(word);
|
|
- if (words.size() <= 1)
|
|
- value = word;
|
|
- else
|
|
- value += " " + word; //$NON-NLS-1$
|
|
- }
|
|
- public List<String> getWords() {
|
|
- return words;
|
|
- }
|
|
- /**
|
|
- * Creates a lucene query for a field
|
|
- */
|
|
- public Query createLuceneQuery(String field, float boost) {
|
|
- PhraseQuery q = new PhraseQuery();
|
|
- for (Iterator<String> it = getWords().iterator(); it.hasNext();) {
|
|
- String word = it.next();
|
|
- Term t = new Term("exact_" + field, word); //$NON-NLS-1$
|
|
- q.add(t);
|
|
- q.setBoost(boost);
|
|
- }
|
|
- return q;
|
|
- }
|
|
-}
|
|
diff --git a/org.eclipse.help.base/src/org/eclipse/help/internal/search/QueryWordsPhrase.java b/org.eclipse.help.base/src/org/eclipse/help/internal/search/QueryWordsPhrase.java
|
|
deleted file mode 100644
|
|
index 8a94e89..0000000
|
|
--- a/org.eclipse.help.base/src/org/eclipse/help/internal/search/QueryWordsPhrase.java
|
|
+++ /dev/null
|
|
@@ -1,51 +0,0 @@
|
|
-/*******************************************************************************
|
|
- * Copyright (c) 2000, 2011 IBM Corporation and others.
|
|
- * All rights reserved. This program and the accompanying materials
|
|
- * are made available under the terms of the Eclipse Public License v1.0
|
|
- * which accompanies this distribution, and is available at
|
|
- * http://www.eclipse.org/legal/epl-v10.html
|
|
- *
|
|
- * Contributors:
|
|
- * IBM Corporation - initial API and implementation
|
|
- *******************************************************************************/
|
|
-package org.eclipse.help.internal.search;
|
|
-import java.util.ArrayList;
|
|
-import java.util.Iterator;
|
|
-import java.util.List;
|
|
-
|
|
-import org.apache.lucene.index.*;
|
|
-import org.apache.lucene.search.*;
|
|
-/**
|
|
- * Represents a phrase (not quoted) token in user search query words It consists
|
|
- * of several words created by an analyzer
|
|
- */
|
|
-public class QueryWordsPhrase extends QueryWordsToken {
|
|
- private List<String> words;
|
|
- public QueryWordsPhrase() {
|
|
- super(QueryWordsToken.PHRASE, ""); //$NON-NLS-1$
|
|
- words = new ArrayList<String>();
|
|
- }
|
|
- public void addWord(String word) {
|
|
- words.add(word);
|
|
- if (words.size() <= 1)
|
|
- value = word;
|
|
- else
|
|
- value += " " + word; //$NON-NLS-1$
|
|
- }
|
|
- public List<String> getWords() {
|
|
- return words;
|
|
- }
|
|
- /**
|
|
- * Creates a lucene query for a field
|
|
- */
|
|
- public Query createLuceneQuery(String field, float boost) {
|
|
- PhraseQuery q = new PhraseQuery();
|
|
- for (Iterator<String> it = getWords().iterator(); it.hasNext();) {
|
|
- String word = it.next();
|
|
- Term t = new Term(field, word);
|
|
- q.add(t);
|
|
- q.setBoost(boost);
|
|
- }
|
|
- return q;
|
|
- }
|
|
-}
|
|
diff --git a/org.eclipse.help.base/src/org/eclipse/help/internal/search/QueryWordsToken.java b/org.eclipse.help.base/src/org/eclipse/help/internal/search/QueryWordsToken.java
|
|
deleted file mode 100644
|
|
index 6ba76f2..0000000
|
|
--- a/org.eclipse.help.base/src/org/eclipse/help/internal/search/QueryWordsToken.java
|
|
+++ /dev/null
|
|
@@ -1,81 +0,0 @@
|
|
-/*******************************************************************************
|
|
- * Copyright (c) 2000, 2007 IBM Corporation and others.
|
|
- * All rights reserved. This program and the accompanying materials
|
|
- * are made available under the terms of the Eclipse Public License v1.0
|
|
- * which accompanies this distribution, and is available at
|
|
- * http://www.eclipse.org/legal/epl-v10.html
|
|
- *
|
|
- * Contributors:
|
|
- * IBM Corporation - initial API and implementation
|
|
- *******************************************************************************/
|
|
-package org.eclipse.help.internal.search;
|
|
-import org.apache.lucene.index.*;
|
|
-import org.apache.lucene.search.*;
|
|
-/**
|
|
- * Represents a token in user search query words
|
|
- */
|
|
-public class QueryWordsToken {
|
|
- public static final int AND = 0;
|
|
- public static final int OR = 1;
|
|
- public static final int NOT = 2;
|
|
- public static final int EXACT_PHRASE = 3;
|
|
- public static final int PHRASE = 4;
|
|
- public static final int WORD = 5;
|
|
- private static final QueryWordsToken fAND = new QueryWordsToken(AND, "AND"); //$NON-NLS-1$
|
|
- private static final QueryWordsToken fOR = new QueryWordsToken(OR, "OR"); //$NON-NLS-1$
|
|
- private static final QueryWordsToken fNOT = new QueryWordsToken(NOT, "NOT"); //$NON-NLS-1$
|
|
- public int type;
|
|
- public String value;
|
|
- protected QueryWordsToken(int type, String value) {
|
|
- this.type = type;
|
|
- this.value = value;
|
|
- }
|
|
- /**
|
|
- * Creates a lucene query for a field
|
|
- */
|
|
- public Query createLuceneQuery(String field, float boost) {
|
|
- Query q;
|
|
- int questionPos = value.indexOf('?');
|
|
- int starPos = value.indexOf('*');
|
|
- if (questionPos >= 0 || starPos >= 0) {
|
|
- if (questionPos == -1 && starPos == value.length() - 1) {
|
|
- Term t = new Term("exact_" + field, value.substring(0, starPos)); //$NON-NLS-1$
|
|
- q = new PrefixQuery(t);
|
|
- ((PrefixQuery) q).setBoost(boost);
|
|
- } else {
|
|
- Term t = new Term("exact_" + field, value); //$NON-NLS-1$
|
|
- q = new WildcardQuery(t);
|
|
- ((WildcardQuery) q).setBoost(boost);
|
|
- }
|
|
- } else {
|
|
- Term t = new Term(field, value);
|
|
- q = new TermQuery(t);
|
|
- ((TermQuery) q).setBoost(boost);
|
|
- }
|
|
- // after updating Lucene, set boost on a Query class
|
|
- return q;
|
|
- }
|
|
- public static QueryWordsToken AND() {
|
|
- return fAND;
|
|
- }
|
|
- public static QueryWordsToken OR() {
|
|
- return fOR;
|
|
- }
|
|
- public static QueryWordsToken NOT() {
|
|
- return fNOT;
|
|
- }
|
|
- public static QueryWordsToken word(String word) {
|
|
- return new QueryWordsToken(QueryWordsToken.WORD, word);
|
|
- }
|
|
- public static QueryWordsPhrase phrase() {
|
|
- return new QueryWordsPhrase();
|
|
- }
|
|
- public static QueryWordsExactPhrase exactPhrase() {
|
|
- return new QueryWordsExactPhrase();
|
|
- }
|
|
- public static QueryWordsExactPhrase exactPhrase(String word) {
|
|
- QueryWordsExactPhrase token = new QueryWordsExactPhrase();
|
|
- token.addWord(word);
|
|
- return token;
|
|
- }
|
|
-}
|
|
diff --git a/org.eclipse.help.base/src/org/eclipse/help/internal/search/SearchIndex.java b/org.eclipse.help.base/src/org/eclipse/help/internal/search/SearchIndex.java
|
|
index 67963a5..131ac65 100644
|
|
--- a/org.eclipse.help.base/src/org/eclipse/help/internal/search/SearchIndex.java
|
|
+++ b/org.eclipse.help.base/src/org/eclipse/help/internal/search/SearchIndex.java
|
|
@@ -1,14 +1,13 @@
|
|
/*******************************************************************************
|
|
- * Copyright (c) 2000, 2014 IBM Corporation and others.
|
|
- * All rights reserved. This program and the accompanying materials
|
|
- * are made available under the terms of the Eclipse Public License v1.0
|
|
+ * Copyright (c) 2000, 2015 IBM Corporation and others. All rights reserved. This program and the
|
|
+ * accompanying materials are made available under the terms of the Eclipse Public License v1.0
|
|
* which accompanies this distribution, and is available at
|
|
* http://www.eclipse.org/legal/epl-v10.html
|
|
*
|
|
- * Contributors:
|
|
- * IBM Corporation - initial API and implementation
|
|
- * Holger Voormann - fix for bug 426785 (http://eclip.se/426785)
|
|
- * Alexander Kurtakov - Bug 460787
|
|
+ * Contributors: IBM Corporation - initial API and implementation
|
|
+ * Holger Voormann - fix for bug 426785 (http://eclip.se/426785)
|
|
+ * Alexander Kurtakov - Bug 460787
|
|
+ * Sopot Cela - Bug 466829 - Migration to Lucene 5
|
|
*******************************************************************************/
|
|
package org.eclipse.help.internal.search;
|
|
|
|
@@ -17,6 +16,7 @@
|
|
import java.io.IOException;
|
|
import java.io.InputStream;
|
|
import java.io.RandomAccessFile;
|
|
+import java.io.StringReader;
|
|
import java.net.MalformedURLException;
|
|
import java.net.URL;
|
|
import java.nio.channels.FileLock;
|
|
@@ -33,19 +33,30 @@
|
|
import java.util.zip.ZipEntry;
|
|
import java.util.zip.ZipInputStream;
|
|
|
|
-import org.apache.lucene.analysis.LimitTokenCountAnalyzer;
|
|
+import org.apache.lucene.analysis.miscellaneous.LimitTokenCountAnalyzer;
|
|
import org.apache.lucene.document.Document;
|
|
import org.apache.lucene.document.Field;
|
|
+import org.apache.lucene.document.StoredField;
|
|
+import org.apache.lucene.document.StringField;
|
|
+import org.apache.lucene.index.DirectoryReader;
|
|
import org.apache.lucene.index.IndexReader;
|
|
import org.apache.lucene.index.IndexWriter;
|
|
-import org.apache.lucene.index.Term;
|
|
-import org.apache.lucene.index.TermDocs;
|
|
-import org.apache.lucene.index.IndexWriterConfig.OpenMode;
|
|
import org.apache.lucene.index.IndexWriterConfig;
|
|
+import org.apache.lucene.index.IndexWriterConfig.OpenMode;
|
|
+import org.apache.lucene.index.LeafReader;
|
|
import org.apache.lucene.index.LogByteSizeMergePolicy;
|
|
import org.apache.lucene.index.LogMergePolicy;
|
|
+import org.apache.lucene.index.PostingsEnum;
|
|
+import org.apache.lucene.index.SlowCompositeReaderWrapper;
|
|
+import org.apache.lucene.index.Term;
|
|
+import org.apache.lucene.queryparser.classic.FastCharStream;
|
|
+import org.apache.lucene.queryparser.classic.MultiFieldQueryParser;
|
|
+import org.apache.lucene.queryparser.classic.QueryParserTokenManager;
|
|
+import org.apache.lucene.queryparser.classic.Token;
|
|
+import org.apache.lucene.search.BooleanClause.Occur;
|
|
import org.apache.lucene.search.BooleanQuery;
|
|
import org.apache.lucene.search.IndexSearcher;
|
|
+import org.apache.lucene.search.MultiTermQuery;
|
|
import org.apache.lucene.search.Query;
|
|
import org.apache.lucene.search.TopDocs;
|
|
import org.apache.lucene.store.Directory;
|
|
@@ -59,7 +70,6 @@
|
|
import org.eclipse.core.runtime.OperationCanceledException;
|
|
import org.eclipse.core.runtime.Platform;
|
|
import org.eclipse.core.runtime.Status;
|
|
-import org.eclipse.help.internal.HelpPlugin;
|
|
import org.eclipse.help.internal.base.BaseHelpSystem;
|
|
import org.eclipse.help.internal.base.HelpBasePlugin;
|
|
import org.eclipse.help.internal.base.util.HelpProperties;
|
|
@@ -86,7 +96,7 @@
|
|
|
|
private File indexDir;
|
|
|
|
- private Directory luceneDirectory;
|
|
+ public Directory luceneDirectory;
|
|
|
|
private String locale;
|
|
|
|
@@ -133,7 +143,7 @@
|
|
private Collection<Thread> searches = new ArrayList<Thread>();
|
|
|
|
private FileLock lock;
|
|
- private RandomAccessFile raf = null;
|
|
+ private RandomAccessFile raf = null;
|
|
|
|
/**
|
|
* Constructor.
|
|
@@ -170,7 +180,7 @@
|
|
inconsistencyFile = new File(indexDir.getParentFile(), locale + ".inconsistent"); //$NON-NLS-1$
|
|
htmlSearchParticipant = new HTMLSearchParticipant(indexDir.getAbsolutePath());
|
|
try {
|
|
- luceneDirectory = new NIOFSDirectory(indexDir);
|
|
+ luceneDirectory = new NIOFSDirectory(indexDir.toPath());
|
|
} catch (IOException e) {
|
|
}
|
|
if (!exists()) {
|
|
@@ -203,11 +213,11 @@
|
|
public IStatus addDocument(String name, URL url) {
|
|
try {
|
|
Document doc = new Document();
|
|
- doc.add(new Field(FIELD_NAME, name, Field.Store.YES, Field.Index.NOT_ANALYZED));
|
|
+ doc.add(new StringField(FIELD_NAME, name, Field.Store.YES));
|
|
addExtraFields(doc);
|
|
String pluginId = LocalSearchManager.getPluginId(name);
|
|
if (relativePath != null) {
|
|
- doc.add(new Field(FIELD_INDEX_ID, relativePath, Field.Store.YES, Field.Index.NOT_ANALYZED));
|
|
+ doc.add(new StringField(FIELD_INDEX_ID, relativePath, Field.Store.YES));
|
|
}
|
|
// check for the explicit search participant.
|
|
SearchParticipant participant = null;
|
|
@@ -220,20 +230,22 @@
|
|
if (participant == null)
|
|
participant = BaseHelpSystem.getLocalSearchManager().getParticipant(pluginId, name);
|
|
if (participant != null) {
|
|
- IStatus status = participant.addDocument(this, pluginId, name, url, id, new LuceneSearchDocument(doc));
|
|
+ IStatus status = participant.addDocument(this, pluginId, name, url, id,
|
|
+ new LuceneSearchDocument(doc));
|
|
if (status.getSeverity() == IStatus.OK) {
|
|
String filters = doc.get("filters"); //$NON-NLS-1$
|
|
indexedDocs.put(name, filters != null ? filters : "0"); //$NON-NLS-1$
|
|
if (id != null)
|
|
- doc.add(new Field("id", id, Field.Store.YES, Field.Index.NO)); //$NON-NLS-1$
|
|
+ doc.add(new StoredField("id", id)); //$NON-NLS-1$
|
|
if (pid != null)
|
|
- doc.add(new Field("participantId", pid, Field.Store.YES, Field.Index.NO)); //$NON-NLS-1$
|
|
+ doc.add(new StoredField("participantId", pid)); //$NON-NLS-1$
|
|
iw.addDocument(doc);
|
|
}
|
|
return status;
|
|
}
|
|
// default to html
|
|
- IStatus status = htmlSearchParticipant.addDocument(this, pluginId, name, url, id, new LuceneSearchDocument(doc));
|
|
+ IStatus status = htmlSearchParticipant.addDocument(this, pluginId, name, url, id,
|
|
+ new LuceneSearchDocument(doc));
|
|
if (status.getSeverity() == IStatus.OK) {
|
|
String filters = doc.get("filters"); //$NON-NLS-1$
|
|
indexedDocs.put(name, filters != null ? filters : "0"); //$NON-NLS-1$
|
|
@@ -245,20 +257,21 @@
|
|
"IO exception occurred while adding document " + name //$NON-NLS-1$
|
|
+ " to index " + indexDir.getAbsolutePath() + ".", //$NON-NLS-1$ //$NON-NLS-2$
|
|
e);
|
|
- }
|
|
- catch (Exception e) {
|
|
+ } catch (Exception e) {
|
|
return new Status(IStatus.ERROR, HelpBasePlugin.PLUGIN_ID, IStatus.ERROR,
|
|
"An unexpected internal error occurred while adding document " //$NON-NLS-1$
|
|
+ name + " to index " + indexDir.getAbsolutePath() //$NON-NLS-1$
|
|
- + ".", e); //$NON-NLS-1$
|
|
+ + ".", //$NON-NLS-1$
|
|
+ e);
|
|
}
|
|
}
|
|
|
|
/**
|
|
- * Add any extra fields that need to be added to this document. Subclasses
|
|
- * should override to add more fields.
|
|
+ * Add any extra fields that need to be added to this document. Subclasses should override to
|
|
+ * add more fields.
|
|
*
|
|
- * @param doc the document to add fields to
|
|
+ * @param doc
|
|
+ * the document to add fields to
|
|
*/
|
|
protected void addExtraFields(Document doc) {
|
|
}
|
|
@@ -282,8 +295,9 @@
|
|
indexedDocs = new HelpProperties(INDEXED_DOCS_FILE, indexDir);
|
|
indexedDocs.restore();
|
|
setInconsistent(true);
|
|
- LimitTokenCountAnalyzer analyzer = new LimitTokenCountAnalyzer(analyzerDescriptor.getAnalyzer(), 1000000);
|
|
- IndexWriterConfig writerConfig = new IndexWriterConfig(org.apache.lucene.util.Version.LUCENE_31, analyzer);
|
|
+ LimitTokenCountAnalyzer analyzer = new LimitTokenCountAnalyzer(analyzerDescriptor.getAnalyzer(),
|
|
+ 1000000);
|
|
+ IndexWriterConfig writerConfig = new IndexWriterConfig(analyzer);
|
|
writerConfig.setOpenMode(create ? OpenMode.CREATE : OpenMode.APPEND);
|
|
LogMergePolicy mergePolicy = new LogByteSizeMergePolicy();
|
|
mergePolicy.setMergeFactor(20);
|
|
@@ -307,7 +321,7 @@
|
|
indexedDocs = new HelpProperties(INDEXED_DOCS_FILE, indexDir);
|
|
indexedDocs.restore();
|
|
setInconsistent(true);
|
|
- ir = IndexReader.open(luceneDirectory, false);
|
|
+ ir = DirectoryReader.open(luceneDirectory);
|
|
return true;
|
|
} catch (IOException e) {
|
|
HelpBasePlugin.logError("Exception occurred in search indexing at beginDeleteBatch.", e); //$NON-NLS-1$
|
|
@@ -323,7 +337,7 @@
|
|
if (ir != null) {
|
|
ir.close();
|
|
}
|
|
- ir = IndexReader.open(luceneDirectory, false);
|
|
+ ir = DirectoryReader.open(luceneDirectory);
|
|
return true;
|
|
} catch (IOException e) {
|
|
HelpBasePlugin.logError("Exception occurred in search indexing at beginDeleteBatch.", e); //$NON-NLS-1$
|
|
@@ -334,14 +348,14 @@
|
|
/**
|
|
* Deletes a single document from the index.
|
|
*
|
|
- * @param name -
|
|
- * document name
|
|
+ * @param name
|
|
+ * - document name
|
|
* @return IStatus
|
|
*/
|
|
public IStatus removeDocument(String name) {
|
|
Term term = new Term(FIELD_NAME, name);
|
|
try {
|
|
- ir.deleteDocuments(term);
|
|
+ iw.deleteDocuments(term);
|
|
indexedDocs.remove(name);
|
|
} catch (IOException e) {
|
|
return new Status(IStatus.ERROR, HelpBasePlugin.PLUGIN_ID, IStatus.ERROR,
|
|
@@ -375,11 +389,11 @@
|
|
}
|
|
|
|
/*
|
|
- * The searcher's index reader has it's stuff in memory so it won't
|
|
- * know about this change. Close it so that it gets reloaded next search.
|
|
+ * The searcher's index reader has it's stuff in memory so it won't know about this
|
|
+ * change. Close it so that it gets reloaded next search.
|
|
*/
|
|
if (searcher != null) {
|
|
- searcher.close();
|
|
+ searcher.getIndexReader().close();
|
|
searcher = null;
|
|
}
|
|
return true;
|
|
@@ -407,11 +421,11 @@
|
|
saveDependencies();
|
|
|
|
/*
|
|
- * The searcher's index reader has it's stuff in memory so it won't
|
|
- * know about this change. Close it so that it gets reloaded next search.
|
|
+ * The searcher's index reader has it's stuff in memory so it won't know about this
|
|
+ * change. Close it so that it gets reloaded next search.
|
|
*/
|
|
if (searcher != null) {
|
|
- searcher.close();
|
|
+ searcher.getIndexReader().close();
|
|
searcher = null;
|
|
}
|
|
return true;
|
|
@@ -469,11 +483,11 @@
|
|
String indexId = indexIds.get(i);
|
|
String indexPath = indexPaths.get(i);
|
|
try {
|
|
- dirList.add(new NIOFSDirectory(new File(indexPath)));
|
|
+ dirList.add(new NIOFSDirectory(new File(indexPath).toPath()));
|
|
} catch (IOException ioe) {
|
|
- HelpBasePlugin
|
|
- .logError(
|
|
- "Help search indexing directory could not be created for directory " + indexPath, ioe); //$NON-NLS-1$
|
|
+ HelpBasePlugin.logError(
|
|
+ "Help search indexing directory could not be created for directory " + indexPath, //$NON-NLS-1$
|
|
+ ioe);
|
|
continue;
|
|
}
|
|
|
|
@@ -525,18 +539,19 @@
|
|
}
|
|
|
|
public IStatus removeDuplicates(String name, String[] index_paths) {
|
|
- TermDocs hrefDocs = null;
|
|
- TermDocs indexDocs = null;
|
|
- Term hrefTerm = new Term(FIELD_NAME, name);
|
|
try {
|
|
+ LeafReader ar = SlowCompositeReaderWrapper.wrap(ir);
|
|
+ PostingsEnum hrefDocs = null;
|
|
+ PostingsEnum indexDocs = null;
|
|
+ Term hrefTerm = new Term(FIELD_NAME, name);
|
|
for (int i = 0; i < index_paths.length; i++) {
|
|
Term indexTerm = new Term(FIELD_INDEX_ID, index_paths[i]);
|
|
if (i == 0) {
|
|
- hrefDocs = ir.termDocs(hrefTerm);
|
|
- indexDocs = ir.termDocs(indexTerm);
|
|
+ hrefDocs = ar.postings(hrefTerm);
|
|
+ indexDocs = ar.postings(indexTerm);
|
|
} else {
|
|
- hrefDocs.seek(hrefTerm);
|
|
- indexDocs.seek(indexTerm);
|
|
+ hrefDocs = ar.postings(hrefTerm);
|
|
+ indexDocs = ar.postings(indexTerm);
|
|
}
|
|
removeDocuments(hrefDocs, indexDocs);
|
|
}
|
|
@@ -545,19 +560,6 @@
|
|
"IO exception occurred while removing duplicates of document " + name //$NON-NLS-1$
|
|
+ " from index " + indexDir.getAbsolutePath() + ".", //$NON-NLS-1$ //$NON-NLS-2$
|
|
ioe);
|
|
- } finally {
|
|
- if (hrefDocs != null) {
|
|
- try {
|
|
- hrefDocs.close();
|
|
- } catch (IOException e) {
|
|
- }
|
|
- }
|
|
- if (indexDocs != null) {
|
|
- try {
|
|
- indexDocs.close();
|
|
- } catch (IOException e) {
|
|
- }
|
|
- }
|
|
}
|
|
return Status.OK_STATUS;
|
|
}
|
|
@@ -569,33 +571,33 @@
|
|
* @param docs2
|
|
* @throws IOException
|
|
*/
|
|
- private void removeDocuments(TermDocs doc1, TermDocs docs2) throws IOException {
|
|
- if (!doc1.next()) {
|
|
+ private void removeDocuments(PostingsEnum doc1, PostingsEnum docs2) throws IOException {
|
|
+ if (doc1.nextDoc() == PostingsEnum.NO_MORE_DOCS) {
|
|
return;
|
|
}
|
|
- if (!docs2.next()) {
|
|
+ if (docs2.nextDoc() == PostingsEnum.NO_MORE_DOCS) {
|
|
return;
|
|
}
|
|
while (true) {
|
|
- if (doc1.doc() < docs2.doc()) {
|
|
- if (!doc1.skipTo(docs2.doc())) {
|
|
- if (!doc1.next()) {
|
|
+ if (doc1.docID() < docs2.docID()) {
|
|
+ if (doc1.advance(docs2.docID()) == PostingsEnum.NO_MORE_DOCS) {
|
|
+ if (doc1.nextDoc() == PostingsEnum.NO_MORE_DOCS) {
|
|
return;
|
|
}
|
|
}
|
|
- } else if (doc1.doc() > docs2.doc()) {
|
|
- if (!docs2.skipTo(doc1.doc())) {
|
|
- if (!doc1.next()) {
|
|
+ } else if (doc1.docID() > docs2.docID()) {
|
|
+ if (docs2.advance(doc1.docID()) == PostingsEnum.NO_MORE_DOCS) {
|
|
+ if (doc1.nextDoc() == PostingsEnum.NO_MORE_DOCS) {
|
|
return;
|
|
}
|
|
}
|
|
}
|
|
- if (doc1.doc() == docs2.doc()) {
|
|
- ir.deleteDocument(doc1.doc());
|
|
- if (!doc1.next()) {
|
|
+ if (doc1.docID() == docs2.docID()) {
|
|
+ iw.tryDeleteDocument(ir, doc1.docID());
|
|
+ if (doc1.nextDoc() == PostingsEnum.NO_MORE_DOCS) {
|
|
return;
|
|
}
|
|
- if (!docs2.next()) {
|
|
+ if (docs2.nextDoc() == PostingsEnum.NO_MORE_DOCS) {
|
|
return;
|
|
}
|
|
}
|
|
@@ -623,20 +625,72 @@
|
|
registerSearch(Thread.currentThread());
|
|
if (closed)
|
|
return;
|
|
- QueryBuilder queryBuilder = new QueryBuilder(searchQuery.getSearchWord(), analyzerDescriptor);
|
|
- Query luceneQuery = queryBuilder.getLuceneQuery(searchQuery.getFieldNames(), searchQuery
|
|
- .isFieldSearch());
|
|
- if (HelpPlugin.DEBUG_SEARCH) {
|
|
- System.out.println("Search Query: " + luceneQuery.toString()); //$NON-NLS-1$
|
|
+
|
|
+ String[] fields;
|
|
+ if (searchQuery.isFieldSearch()){
|
|
+ //sometimes you might want to search other than the default fields
|
|
+ fields = (String[]) searchQuery.getFieldNames().toArray();
|
|
+ }else {
|
|
+ fields = new String[]{"contents","title"}; //$NON-NLS-1$ //$NON-NLS-2$
|
|
}
|
|
- String highlightTerms = queryBuilder.gethighlightTerms();
|
|
- if (luceneQuery != null) {
|
|
+
|
|
+ //prepare the parser
|
|
+
|
|
+ MultiFieldQueryParser qb = new MultiFieldQueryParser(fields,analyzerDescriptor.getAnalyzer());
|
|
+ qb.setAllowLeadingWildcard(true);
|
|
+ qb.setAnalyzeRangeTerms(true);
|
|
+ qb.setAutoGeneratePhraseQueries(true);
|
|
+ qb.setMultiTermRewriteMethod(MultiTermQuery.CONSTANT_SCORE_BOOLEAN_REWRITE);
|
|
+ qb.setLowercaseExpandedTerms(true);
|
|
+ qb.setLocale(new Locale(analyzerDescriptor.getLang()));
|
|
+
|
|
+ //parse the "pure" query (no boosting)
|
|
+ Query luceneQuery = qb.parse(searchQuery.getSearchWord());
|
|
+
|
|
+ //we'll merge the pure query with a some boosted queries
|
|
+ Query mergedQuery;
|
|
+
|
|
+ if (!isWildcardQuery(searchQuery.getSearchWord())){
|
|
+ mergedQuery = new BooleanQuery(); //merge for all fields before merging with luceneQuery
|
|
+ for (int i=0;i<fields.length;i++){
|
|
+ Query exactBoostQuery= qb.createPhraseQuery(fields[i], searchQuery.getSearchWord());
|
|
+ exactBoostQuery.setBoost(10.0f);
|
|
+ ((BooleanQuery) mergedQuery).add(exactBoostQuery,Occur.SHOULD);
|
|
+ }
|
|
+ ((BooleanQuery) mergedQuery).add(luceneQuery,Occur.SHOULD);
|
|
+
|
|
+ }else {
|
|
+ mergedQuery = luceneQuery;
|
|
+ }
|
|
+
|
|
+ Set<String> set = new HashSet<String>();
|
|
+
|
|
+ if (mergedQuery != null) {
|
|
if (searcher == null) {
|
|
openSearcher();
|
|
}
|
|
- TopDocs topDocs = searcher.search(luceneQuery, null, 1000);
|
|
- collector.addHits(LocalSearchManager.asList(topDocs, searcher), highlightTerms);
|
|
+ TopDocs topDocs = searcher.search(mergedQuery, 1000);
|
|
+
|
|
+ String highlight=null;
|
|
+ QueryParserTokenManager manager = new QueryParserTokenManager(new FastCharStream(new StringReader(searchQuery.getSearchWord())));
|
|
+ while (true){
|
|
+ Token nextToken = manager.getNextToken();
|
|
+ String toHighlight = null;
|
|
+ if (nextToken.kind==0) break;
|
|
+ String image = nextToken.image;
|
|
+ toHighlight=image;
|
|
+ if ((image.startsWith("\""))&&( image.endsWith("\""))){ //$NON-NLS-1$//$NON-NLS-2$
|
|
+ toHighlight = image.substring(1,image.length()-1);
|
|
+ }
|
|
+ if (image.equals("AND") || image.equals("OR")) //$NON-NLS-1$ //$NON-NLS-2$
|
|
+ continue;
|
|
+ set .add(toHighlight);
|
|
+
|
|
+ }
|
|
+ highlight = buildHighlight(set);
|
|
+ collector.addHits(LocalSearchManager.asList(topDocs, searcher), highlight==null?"":highlight); //$NON-NLS-1$
|
|
}
|
|
+
|
|
} catch (BooleanQuery.TooManyClauses tmc) {
|
|
collector.addQTCException(new QueryTooComplexException());
|
|
} catch (QueryTooComplexException qe) {
|
|
@@ -647,6 +701,19 @@
|
|
} finally {
|
|
unregisterSearch(Thread.currentThread());
|
|
}
|
|
+ }
|
|
+
|
|
+ private boolean isWildcardQuery(String searchWord) {
|
|
+
|
|
+ return searchWord.contains("?")|| searchWord.contains("*"); //$NON-NLS-1$//$NON-NLS-2$
|
|
+ }
|
|
+
|
|
+ private String buildHighlight(Set<String> set) {
|
|
+ StringBuilder sb = new StringBuilder();
|
|
+ for (String string : set) {
|
|
+ sb.append("\""+string+"\""); //$NON-NLS-1$//$NON-NLS-2$
|
|
+ }
|
|
+ return sb.toString();
|
|
}
|
|
|
|
public String getLocale() {
|
|
@@ -660,13 +727,13 @@
|
|
if (docPlugins == null) {
|
|
Set<String> totalIds = new HashSet<String>();
|
|
IExtensionRegistry registry = Platform.getExtensionRegistry();
|
|
- IExtensionPoint extensionPoint = registry.getExtensionPoint(TocFileProvider.EXTENSION_POINT_ID_TOC);
|
|
+ IExtensionPoint extensionPoint = registry
|
|
+ .getExtensionPoint(TocFileProvider.EXTENSION_POINT_ID_TOC);
|
|
IExtension[] extensions = extensionPoint.getExtensions();
|
|
- for (int i=0;i<extensions.length;++i) {
|
|
+ for (int i = 0; i < extensions.length; ++i) {
|
|
try {
|
|
totalIds.add(extensions[i].getNamespaceIdentifier());
|
|
- }
|
|
- catch (InvalidRegistryObjectException e) {
|
|
+ } catch (InvalidRegistryObjectException e) {
|
|
// ignore this extension and move on
|
|
}
|
|
}
|
|
@@ -717,25 +784,27 @@
|
|
|
|
/**
|
|
* Determines whether an index can be read by the Lucene bundle
|
|
- * @param indexVersionString The version of an Index directory
|
|
+ *
|
|
+ * @param indexVersionString
|
|
+ * The version of an Index directory
|
|
* @return
|
|
*/
|
|
public boolean isLuceneCompatible(String indexVersionString) {
|
|
- if (indexVersionString==null) return false;
|
|
+ if (indexVersionString == null)
|
|
+ return false;
|
|
String luceneVersionString = ""; //$NON-NLS-1$
|
|
Bundle luceneBundle = Platform.getBundle(LUCENE_BUNDLE_ID);
|
|
if (luceneBundle != null) {
|
|
- luceneVersionString += luceneBundle.getHeaders()
|
|
- .get(Constants.BUNDLE_VERSION);
|
|
+ luceneVersionString += luceneBundle.getHeaders().get(Constants.BUNDLE_VERSION);
|
|
}
|
|
Version luceneVersion = new Version(luceneVersionString);
|
|
Version indexVersion = new Version(indexVersionString);
|
|
- Version v191 = new Version(1, 9, 1);
|
|
- if (indexVersion.compareTo(v191) < 0) {
|
|
- // index is older than Lucene 1.9.1
|
|
+ Version v500 = new Version(5, 0, 0);
|
|
+ if (indexVersion.compareTo(v500) < 0) {
|
|
+ // index is older than Lucene 5.0.0
|
|
return false;
|
|
}
|
|
- if ( luceneVersion.compareTo(indexVersion) >= 0 ) {
|
|
+ if (luceneVersion.compareTo(indexVersion) >= 0) {
|
|
// Lucene bundle is newer than the index
|
|
return true;
|
|
}
|
|
@@ -802,7 +871,7 @@
|
|
public void openSearcher() throws IOException {
|
|
synchronized (searcherCreateLock) {
|
|
if (searcher == null) {
|
|
- searcher = new IndexSearcher(IndexReader.open(luceneDirectory, false));
|
|
+ searcher = new IndexSearcher(DirectoryReader.open(luceneDirectory));
|
|
}
|
|
}
|
|
}
|
|
@@ -820,7 +889,7 @@
|
|
if (searches.isEmpty()) {
|
|
if (searcher != null) {
|
|
try {
|
|
- searcher.close();
|
|
+ searcher.getIndexReader().close();
|
|
} catch (IOException ioe) {
|
|
}
|
|
}
|
|
@@ -838,7 +907,8 @@
|
|
* Finds and unzips prebuild index specified in preferences
|
|
*/
|
|
private void unzipProductIndex() {
|
|
- String indexPluginId = Platform.getPreferencesService().getString(HelpBasePlugin.PLUGIN_ID, "productIndex", null, null); //$NON-NLS-1$
|
|
+ String indexPluginId = Platform.getPreferencesService().getString(HelpBasePlugin.PLUGIN_ID,
|
|
+ "productIndex", null, null); //$NON-NLS-1$
|
|
if (indexPluginId == null || indexPluginId.length() <= 0) {
|
|
return;
|
|
}
|
|
@@ -901,10 +971,11 @@
|
|
*/
|
|
private void cleanOldIndex() {
|
|
IndexWriter cleaner = null;
|
|
- LimitTokenCountAnalyzer analyzer = new LimitTokenCountAnalyzer(analyzerDescriptor.getAnalyzer(), 10000);
|
|
+ LimitTokenCountAnalyzer analyzer = new LimitTokenCountAnalyzer(analyzerDescriptor.getAnalyzer(),
|
|
+ 10000);
|
|
try {
|
|
- cleaner = new IndexWriter(luceneDirectory, new IndexWriterConfig(org.apache.lucene.util.Version.LUCENE_31, analyzer).setOpenMode(
|
|
- OpenMode.CREATE));
|
|
+ cleaner = new IndexWriter(luceneDirectory,
|
|
+ new IndexWriterConfig(analyzer).setOpenMode(OpenMode.CREATE));
|
|
} catch (IOException ioe) {
|
|
} finally {
|
|
try {
|
|
@@ -957,7 +1028,7 @@
|
|
* if lock already obtained
|
|
*/
|
|
public synchronized boolean tryLock() throws OverlappingFileLockException {
|
|
- if ("none".equals(System.getProperty("osgi.locking"))) { //$NON-NLS-1$//$NON-NLS-2$
|
|
+ if ("none".equals(System.getProperty("osgi.locking"))) { //$NON-NLS-1$//$NON-NLS-2$
|
|
return true; // Act as if lock succeeded
|
|
}
|
|
if (lock != null) {
|
|
@@ -979,7 +1050,7 @@
|
|
lock = null;
|
|
logLockFailure(ioe);
|
|
}
|
|
- if ( raf != null ) {
|
|
+ if (raf != null) {
|
|
try {
|
|
raf.close();
|
|
} catch (IOException e) {
|
|
@@ -1025,9 +1096,9 @@
|
|
}
|
|
lock = null;
|
|
}
|
|
- if (raf != null ) {
|
|
+ if (raf != null) {
|
|
try {
|
|
- raf.close();
|
|
+ raf.close();
|
|
} catch (IOException ioe) {
|
|
}
|
|
raf = null;
|
|
@@ -1100,25 +1171,26 @@
|
|
|
|
public IStatus addDocument(String pluginId, String name, URL url, String id, Document doc) {
|
|
// try a registered participant for the file format
|
|
- SearchParticipant participant = BaseHelpSystem.getLocalSearchManager()
|
|
- .getParticipant(pluginId, name);
|
|
+ SearchParticipant participant = BaseHelpSystem.getLocalSearchManager().getParticipant(pluginId, name);
|
|
if (participant != null) {
|
|
try {
|
|
return participant.addDocument(this, pluginId, name, url, id, new LuceneSearchDocument(doc));
|
|
- }
|
|
- catch (Throwable t) {
|
|
+ } catch (Throwable t) {
|
|
return new Status(IStatus.ERROR, HelpBasePlugin.PLUGIN_ID, IStatus.ERROR,
|
|
"Error while adding document to search participant (addDocument()): " //$NON-NLS-1$
|
|
- + name + ", " + url + "for participant " + participant.getClass().getName(), t); //$NON-NLS-1$ //$NON-NLS-2$
|
|
+ + name + ", " + url + "for participant " + participant.getClass().getName(), //$NON-NLS-1$ //$NON-NLS-2$
|
|
+ t);
|
|
}
|
|
}
|
|
// default to html
|
|
- return htmlSearchParticipant.addDocument(this, pluginId, name, url, id, new LuceneSearchDocument(doc));
|
|
+ return htmlSearchParticipant.addDocument(this, pluginId, name, url, id,
|
|
+ new LuceneSearchDocument(doc));
|
|
}
|
|
|
|
- public IStatus addSearchableDocument(String pluginId, String name, URL url, String id, ISearchDocument doc) {
|
|
+ public IStatus addSearchableDocument(String pluginId, String name, URL url, String id,
|
|
+ ISearchDocument doc) {
|
|
// In the help system the only class that implements ISearchDocument is LuceneSearchDocument
|
|
- LuceneSearchDocument luceneDoc = (LuceneSearchDocument)doc;
|
|
+ LuceneSearchDocument luceneDoc = (LuceneSearchDocument) doc;
|
|
return addDocument(pluginId, name, url, id, luceneDoc.getDocument());
|
|
}
|
|
}
|
|
diff --git a/org.eclipse.help.base/src/org/eclipse/help/internal/search/SmartAnalyzer.java b/org.eclipse.help.base/src/org/eclipse/help/internal/search/SmartAnalyzer.java
|
|
index d0a7bb7..444b66c 100644
|
|
--- a/org.eclipse.help.base/src/org/eclipse/help/internal/search/SmartAnalyzer.java
|
|
+++ b/org.eclipse.help.base/src/org/eclipse/help/internal/search/SmartAnalyzer.java
|
|
@@ -1,5 +1,5 @@
|
|
/*******************************************************************************
|
|
- * Copyright (c) 2000, 2012 IBM Corporation and others.
|
|
+ * Copyright (c) 2000, 2015 IBM Corporation and others.
|
|
* All rights reserved. This program and the accompanying materials
|
|
* are made available under the terms of the Eclipse Public License v1.0
|
|
* which accompanies this distribution, and is available at
|
|
@@ -7,10 +7,9 @@
|
|
*
|
|
* Contributors:
|
|
* IBM Corporation - initial API and implementation
|
|
+ * Sopot Cela - Bug 466829 - Migration to Lucene 5
|
|
*******************************************************************************/
|
|
package org.eclipse.help.internal.search;
|
|
-
|
|
-import java.io.*;
|
|
|
|
import org.apache.lucene.analysis.*;
|
|
|
|
@@ -18,7 +17,7 @@
|
|
* Smart Analyzer. Chooses underlying implementation based on the field which
|
|
* text is analyzed.
|
|
*/
|
|
-public final class SmartAnalyzer extends Analyzer {
|
|
+public final class SmartAnalyzer extends AnalyzerWrapper {
|
|
Analyzer pluggedInAnalyzer;
|
|
Analyzer exactAnalyzer;
|
|
|
|
@@ -26,19 +25,19 @@
|
|
* Constructor for SmartAnalyzer.
|
|
*/
|
|
public SmartAnalyzer(String locale, Analyzer pluggedInAnalyzer) {
|
|
- super();
|
|
+ super(pluggedInAnalyzer.getReuseStrategy());
|
|
this.pluggedInAnalyzer = pluggedInAnalyzer;
|
|
this.exactAnalyzer = new DefaultAnalyzer(locale);
|
|
}
|
|
/**
|
|
- * Creates a TokenStream which tokenizes all the text in the provided
|
|
- * Reader. Delegates to DefaultAnalyzer when field used to search for exact
|
|
+ * Delegates to DefaultAnalyzer when field used to search for exact
|
|
* match, and to plugged-in analyzer for other fields.
|
|
*/
|
|
- public final TokenStream tokenStream(String fieldName, Reader reader) {
|
|
+ @Override
|
|
+ public final Analyzer getWrappedAnalyzer(String fieldName) {
|
|
if (fieldName != null && fieldName.startsWith("exact_")) { //$NON-NLS-1$
|
|
- return exactAnalyzer.tokenStream(fieldName, reader);
|
|
+ return exactAnalyzer;
|
|
}
|
|
- return pluggedInAnalyzer.tokenStream(fieldName, reader);
|
|
+ return pluggedInAnalyzer;
|
|
}
|
|
}
|
|
diff --git a/org.eclipse.help.base/src/org/eclipse/help/internal/search/WordTokenStream.java b/org.eclipse.help.base/src/org/eclipse/help/internal/search/WordTokenStream.java
|
|
index 0b70cf7..79d5592 100644
|
|
--- a/org.eclipse.help.base/src/org/eclipse/help/internal/search/WordTokenStream.java
|
|
+++ b/org.eclipse.help.base/src/org/eclipse/help/internal/search/WordTokenStream.java
|
|
@@ -1,5 +1,5 @@
|
|
/*******************************************************************************
|
|
- * Copyright (c) 2000, 2012 IBM Corporation and others.
|
|
+ * Copyright (c) 2000, 2015 IBM Corporation and others.
|
|
* All rights reserved. This program and the accompanying materials
|
|
* are made available under the terms of the Eclipse Public License v1.0
|
|
* which accompanies this distribution, and is available at
|
|
@@ -7,16 +7,18 @@
|
|
*
|
|
* Contributors:
|
|
* IBM Corporation - initial API and implementation
|
|
+ * Sopot Cela - Bug 466829 - Migration to Lucene 5
|
|
*******************************************************************************/
|
|
package org.eclipse.help.internal.search;
|
|
|
|
-import com.ibm.icu.text.BreakIterator;
|
|
import java.io.IOException;
|
|
-import java.io.Reader;
|
|
import java.util.Locale;
|
|
+
|
|
import org.apache.lucene.analysis.TokenStream;
|
|
import org.apache.lucene.analysis.Tokenizer;
|
|
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
|
+
|
|
+import com.ibm.icu.text.BreakIterator;
|
|
|
|
/**
|
|
* WordTokenStream obtains tokens containing words appropriate for use with
|
|
@@ -24,7 +26,6 @@
|
|
*/
|
|
public final class WordTokenStream extends Tokenizer {
|
|
private static final int BUF_LEN = 4096;
|
|
- private final Reader reader;
|
|
private final BreakIterator boundary;
|
|
private StringBuffer strbuf;
|
|
|
|
@@ -34,8 +35,8 @@
|
|
/**
|
|
* Constructor
|
|
*/
|
|
- public WordTokenStream(String fieldName, Reader reader, Locale locale) {
|
|
- this.reader = reader;
|
|
+ public WordTokenStream(Locale locale) {
|
|
+ super();
|
|
boundary = BreakIterator.getWordInstance(locale);
|
|
|
|
}
|
|
@@ -52,9 +53,9 @@
|
|
if(strbuf == null) {
|
|
int available;
|
|
char[] cbuf = new char[BUF_LEN];
|
|
- while ((available = reader.read(cbuf)) <= 0) {
|
|
+ while ((available = input.read(cbuf)) <= 0) {
|
|
if (available < 0) {
|
|
- reader.close();
|
|
+ input.close();
|
|
return false;
|
|
}
|
|
}
|
|
@@ -62,7 +63,7 @@
|
|
strbuf.append(cbuf, 0, available);
|
|
// read more until white space (or EOF)
|
|
int c;
|
|
- while (0 <= (c = reader.read())) {
|
|
+ while (0 <= (c = input.read())) {
|
|
strbuf.append((char) c);
|
|
if (c == ' ' || c == '\r' || c == '\n' || c == '\t') {
|
|
break;
|
|
@@ -70,7 +71,7 @@
|
|
}
|
|
|
|
if (c < 0) {
|
|
- reader.close();
|
|
+ input.close();
|
|
}
|
|
|
|
boundary.setText(strbuf.toString());
|
|
@@ -105,9 +106,10 @@
|
|
}
|
|
|
|
public void close() throws IOException {
|
|
+ super.close();
|
|
/// Unlikely to be called as this is a reused
|
|
- if (this.reader != null) {
|
|
- this.reader.close();
|
|
+ if (this.input != null) {
|
|
+ this.input.close();
|
|
}
|
|
}
|
|
}
|
|
diff --git a/org.eclipse.help.webapp/web-archive/org.eclipse.help.infocenter-feature/feature.xml b/org.eclipse.help.webapp/web-archive/org.eclipse.help.infocenter-feature/feature.xml
|
|
index 8438100..174e92a 100644
|
|
--- a/org.eclipse.help.webapp/web-archive/org.eclipse.help.infocenter-feature/feature.xml
|
|
+++ b/org.eclipse.help.webapp/web-archive/org.eclipse.help.infocenter-feature/feature.xml
|
|
@@ -74,20 +74,6 @@
|
|
unpack="false"/>
|
|
|
|
<plugin
|
|
- id="org.apache.lucene.analysis"
|
|
- download-size="0"
|
|
- install-size="0"
|
|
- version="0.0.0"
|
|
- unpack="false"/>
|
|
-
|
|
- <plugin
|
|
- id="org.apache.lucene.core"
|
|
- download-size="0"
|
|
- install-size="0"
|
|
- version="0.0.0"
|
|
- unpack="false"/>
|
|
-
|
|
- <plugin
|
|
id="org.eclipse.core.contenttype"
|
|
download-size="0"
|
|
install-size="0"
|
|
@@ -227,4 +213,32 @@
|
|
version="0.0.0"
|
|
unpack="false"/>
|
|
|
|
+ <plugin
|
|
+ id="org.apache.lucene.analyzers-common"
|
|
+ download-size="0"
|
|
+ install-size="0"
|
|
+ version="0.0.0"
|
|
+ unpack="false"/>
|
|
+
|
|
+ <plugin
|
|
+ id="org.apache.lucene.analyzers-smartcn"
|
|
+ download-size="0"
|
|
+ install-size="0"
|
|
+ version="0.0.0"
|
|
+ unpack="false"/>
|
|
+
|
|
+ <plugin
|
|
+ id="org.apache.lucene.core"
|
|
+ download-size="0"
|
|
+ install-size="0"
|
|
+ version="0.0.0"
|
|
+ unpack="false"/>
|
|
+
|
|
+ <plugin
|
|
+ id="org.apache.lucene.queryparser"
|
|
+ download-size="0"
|
|
+ install-size="0"
|
|
+ version="0.0.0"
|
|
+ unpack="false"/>
|
|
+
|
|
</feature>
|
|
diff --git a/org.eclipse.ua.tests/META-INF/MANIFEST.MF b/org.eclipse.ua.tests/META-INF/MANIFEST.MF
|
|
index 6bcf9bc..23f8910 100644
|
|
--- a/org.eclipse.ua.tests/META-INF/MANIFEST.MF
|
|
+++ b/org.eclipse.ua.tests/META-INF/MANIFEST.MF
|
|
@@ -19,14 +19,13 @@
|
|
org.eclipse.ui.forms,
|
|
org.eclipse.ui.browser;bundle-version="3.2.300",
|
|
org.eclipse.equinox.jsp.jasper;bundle-version="1.0.200",
|
|
- org.eclipse.equinox.jsp.jasper.registry;bundle-version="1.0.100"
|
|
+ org.eclipse.equinox.jsp.jasper.registry;bundle-version="1.0.100",
|
|
+ org.apache.lucene.analyzers-common;bundle-version="5.1.0",
|
|
+ org.apache.lucene.core;bundle-version="5.1.0"
|
|
Bundle-ActivationPolicy: lazy
|
|
Bundle-Vendor: Eclipse.org
|
|
Import-Package: javax.servlet;version="3.1.0",
|
|
- javax.servlet.http;version="3.1.0",
|
|
- org.apache.lucene.index;core=split;version="[3.5.0,4.0.0)",
|
|
- org.apache.lucene.search;core=split;version="[3.5.0,4.0.0)",
|
|
- org.apache.lucene.store;core=split;version="[3.5.0,4.0.0)"
|
|
+ javax.servlet.http;version="3.1.0"
|
|
Bundle-RequiredExecutionEnvironment: JavaSE-1.6
|
|
Export-Package: org.eclipse.ua.tests,
|
|
org.eclipse.ua.tests.browser,
|
|
diff --git a/org.eclipse.ua.tests/data/help/searchindex/index291/write.lock b/org.eclipse.ua.tests/data/help/searchindex/index291/write.lock
|
|
new file mode 100644
|
|
index 0000000..e69de29
|
|
--- /dev/null
|
|
+++ b/org.eclipse.ua.tests/data/help/searchindex/index291/write.lock
|
|
diff --git a/org.eclipse.ua.tests/data/help/searchindex/index510/_0.cfe b/org.eclipse.ua.tests/data/help/searchindex/index510/_0.cfe
|
|
new file mode 100644
|
|
index 0000000..09ec2c9
|
|
--- /dev/null
|
|
+++ b/org.eclipse.ua.tests/data/help/searchindex/index510/_0.cfe
|
|
Binary files differ
|
|
diff --git a/org.eclipse.ua.tests/data/help/searchindex/index510/_0.cfs b/org.eclipse.ua.tests/data/help/searchindex/index510/_0.cfs
|
|
new file mode 100644
|
|
index 0000000..3aa288a
|
|
--- /dev/null
|
|
+++ b/org.eclipse.ua.tests/data/help/searchindex/index510/_0.cfs
|
|
Binary files differ
|
|
diff --git a/org.eclipse.ua.tests/data/help/searchindex/index510/_0.si b/org.eclipse.ua.tests/data/help/searchindex/index510/_0.si
|
|
new file mode 100644
|
|
index 0000000..d897bfc
|
|
--- /dev/null
|
|
+++ b/org.eclipse.ua.tests/data/help/searchindex/index510/_0.si
|
|
Binary files differ
|
|
diff --git a/org.eclipse.ua.tests/data/help/searchindex/index510/segments_1 b/org.eclipse.ua.tests/data/help/searchindex/index510/segments_1
|
|
new file mode 100644
|
|
index 0000000..4878901
|
|
--- /dev/null
|
|
+++ b/org.eclipse.ua.tests/data/help/searchindex/index510/segments_1
|
|
Binary files differ
|
|
diff --git a/org.eclipse.ua.tests/data/help/searchindex/index510/write.lock b/org.eclipse.ua.tests/data/help/searchindex/index510/write.lock
|
|
new file mode 100644
|
|
index 0000000..e69de29
|
|
--- /dev/null
|
|
+++ b/org.eclipse.ua.tests/data/help/searchindex/index510/write.lock
|
|
diff --git a/org.eclipse.ua.tests/help/org/eclipse/ua/tests/help/search/EncodedCharacterSearch.java b/org.eclipse.ua.tests/help/org/eclipse/ua/tests/help/search/EncodedCharacterSearch.java
|
|
index 1dd4876..02b2e5b 100644
|
|
--- a/org.eclipse.ua.tests/help/org/eclipse/ua/tests/help/search/EncodedCharacterSearch.java
|
|
+++ b/org.eclipse.ua.tests/help/org/eclipse/ua/tests/help/search/EncodedCharacterSearch.java
|
|
@@ -7,6 +7,7 @@
|
|
*
|
|
* Contributors:
|
|
* IBM Corporation - initial API and implementation
|
|
+ * Sopot Cela - Bug 466829 - Migration to Lucene 5
|
|
*******************************************************************************/
|
|
package org.eclipse.ua.tests.help.search;
|
|
|
|
@@ -43,7 +44,7 @@
|
|
|
|
public void testUtf8Hebrew() {
|
|
SearchTestUtils.searchOneLocale("\u05D0\u05B7\u05E1\u05B0\u05D8\u05B0\u05E8\u05D5\u05B9\u05E0\u05D5\u05B9\u05DE"
|
|
- + "\u05B0\u05D9\u05B8\u05D4) \u05DC\u05B4\u05E7\u05BC\u05D5\u05BC\u05D9 (\u05D9\u05E8\u05D7 \u05D0\u05D5 \u05E9\u05DE\u05E9", new String[] {"/org.eclipse.ua.tests/data/help/search/testnlUTF8.htm" }, "en");
|
|
+ + "\u05B0\u05D9\u05B8\u05D4\\) \u05DC\u05B4\u05E7\u05BC\u05D5\u05BC\u05D9 \\(\u05D9\u05E8\u05D7 \u05D0\u05D5 \u05E9\u05DE\u05E9", new String[] {"/org.eclipse.ua.tests/data/help/search/testnlUTF8.htm" }, "en");
|
|
}
|
|
|
|
|
|
diff --git a/org.eclipse.ua.tests/help/org/eclipse/ua/tests/help/search/PrebuiltIndexCompatibility.java b/org.eclipse.ua.tests/help/org/eclipse/ua/tests/help/search/PrebuiltIndexCompatibility.java
|
|
index 640d4c9..33ae5ba 100644
|
|
--- a/org.eclipse.ua.tests/help/org/eclipse/ua/tests/help/search/PrebuiltIndexCompatibility.java
|
|
+++ b/org.eclipse.ua.tests/help/org/eclipse/ua/tests/help/search/PrebuiltIndexCompatibility.java
|
|
@@ -8,6 +8,7 @@
|
|
* Contributors:
|
|
* IBM Corporation - initial API and implementation
|
|
* Alexander Kurtakov - Bug 460787
|
|
+ * Sopot Cela - Bug 466829 - Migration to Lucene 5
|
|
*******************************************************************************/
|
|
|
|
package org.eclipse.ua.tests.help.search;
|
|
@@ -15,43 +16,38 @@
|
|
import java.io.File;
|
|
import java.io.IOException;
|
|
import java.net.URL;
|
|
-import java.util.ArrayList;
|
|
|
|
-import junit.framework.Test;
|
|
-import junit.framework.TestCase;
|
|
-import junit.framework.TestSuite;
|
|
-
|
|
-import org.osgi.framework.Bundle;
|
|
-
|
|
+import org.apache.lucene.analysis.standard.StandardAnalyzer;
|
|
import org.apache.lucene.index.CorruptIndexException;
|
|
-import org.apache.lucene.index.IndexReader;
|
|
+import org.apache.lucene.index.DirectoryReader;
|
|
import org.apache.lucene.search.IndexSearcher;
|
|
import org.apache.lucene.search.Query;
|
|
import org.apache.lucene.search.TopDocs;
|
|
import org.apache.lucene.store.Directory;
|
|
import org.apache.lucene.store.NIOFSDirectory;
|
|
-
|
|
-import org.eclipse.help.internal.base.BaseHelpSystem;
|
|
-import org.eclipse.help.internal.search.AnalyzerDescriptor;
|
|
-import org.eclipse.help.internal.search.PluginIndex;
|
|
-import org.eclipse.help.internal.search.QueryBuilder;
|
|
-import org.eclipse.help.internal.search.SearchIndexWithIndexingProgress;
|
|
-import org.eclipse.ua.tests.plugin.UserAssistanceTestPlugin;
|
|
-
|
|
+import org.apache.lucene.util.QueryBuilder;
|
|
import org.eclipse.core.runtime.FileLocator;
|
|
import org.eclipse.core.runtime.NullProgressMonitor;
|
|
import org.eclipse.core.runtime.Path;
|
|
+import org.eclipse.help.internal.base.BaseHelpSystem;
|
|
+import org.eclipse.help.internal.search.PluginIndex;
|
|
+import org.eclipse.help.internal.search.SearchIndexWithIndexingProgress;
|
|
+import org.eclipse.ua.tests.plugin.UserAssistanceTestPlugin;
|
|
+import org.osgi.framework.Bundle;
|
|
+
|
|
+import junit.framework.Test;
|
|
+import junit.framework.TestCase;
|
|
+import junit.framework.TestSuite;
|
|
|
|
/**
|
|
- * Verify that older versions of the index can be read by this
|
|
- * version of Eclipse.
|
|
+ * Verify that older versions of the index can be read by this version of
|
|
+ * Eclipse.
|
|
*
|
|
- * How to maintain this test - if when upgrading to a new version
|
|
- * of Lucene one of the IndexReadable tests fails you need to
|
|
- * make the following changes:
|
|
- * 1. Change the corresponding Compatible() test to expect a result of false
|
|
- * 2. Comment out the failing test
|
|
- * 3. Change the help system to recognize that version of Lucene as being incompatible
|
|
+ * How to maintain this test - if when upgrading to a new version of Lucene one
|
|
+ * of the IndexReadable tests fails you need to make the following changes: 1.
|
|
+ * Change the corresponding Compatible() test to expect a result of false 2.
|
|
+ * Comment out the failing test 3. Change the help system to recognize that
|
|
+ * version of Lucene as being incompatible
|
|
*/
|
|
|
|
public class PrebuiltIndexCompatibility extends TestCase {
|
|
@@ -64,48 +60,33 @@
|
|
}
|
|
|
|
/**
|
|
- * Test index built with Lucene 1.9.1
|
|
- */
|
|
- public void test1_9_1_IndexReadable() throws Exception {
|
|
- checkReadable("data/help/searchindex/index191");
|
|
- }
|
|
-
|
|
- /**
|
|
- * Test index built with Lucene 2.9.1
|
|
- */
|
|
- public void test2_9_1_IndexReadable() throws Exception {
|
|
- checkReadable("data/help/searchindex/index291");
|
|
- }
|
|
-
|
|
- /**
|
|
** Test compatibility of Lucene 1.9.1 index with current Lucene
|
|
*/
|
|
- public void test1_9_1Compatible()
|
|
- {
|
|
- checkCompatible("data/help/searchindex/index191", true);
|
|
+ public void test1_9_1Compatible() {
|
|
+ checkCompatible("data/help/searchindex/index191", false);
|
|
}
|
|
-
|
|
+
|
|
/**
|
|
** Test compatibility of Lucene 2.9.1 index with current Lucene
|
|
*/
|
|
- public void test2_9_1Compatible()
|
|
- {
|
|
- checkCompatible("data/help/searchindex/index291", true);
|
|
+ public void test2_9_1Compatible() {
|
|
+ checkCompatible("data/help/searchindex/index291", false);
|
|
}
|
|
|
|
- public void test1_9_1LuceneCompatible()
|
|
- {
|
|
- checkLuceneCompatible("1.9.1", true);
|
|
+ public void test1_9_1LuceneCompatible() {
|
|
+ checkLuceneCompatible("1.9.1", false);
|
|
}
|
|
|
|
- public void test1_4_103NotLuceneCompatible()
|
|
- {
|
|
+ public void test1_4_103NotLuceneCompatible() {
|
|
checkLuceneCompatible("1.4.103", false);
|
|
}
|
|
|
|
- public void test2_9_1LuceneCompatible()
|
|
- {
|
|
- checkLuceneCompatible("2.9.1", true);
|
|
+ public void test2_9_1LuceneCompatible() {
|
|
+ checkLuceneCompatible("2.9.1", false);
|
|
+ }
|
|
+
|
|
+ public void test5_1_0LuceneCompatible() {
|
|
+ checkLuceneCompatible("5.1.0", true);
|
|
}
|
|
|
|
public void testPluginIndexEqualToItself() {
|
|
@@ -114,52 +95,59 @@
|
|
}
|
|
|
|
/**
|
|
- * Verify that if the paths and plugins are the same two PluginIndex objects are equal
|
|
+ * Verify that if the paths and plugins are the same two PluginIndex objects
|
|
+ * are equal
|
|
*/
|
|
public void testPluginIndexEquality() {
|
|
- PluginIndex index1a = createPluginIndex("data/help/searchindex/index191");
|
|
- PluginIndex index1b = createPluginIndex("data/help/searchindex/index191");
|
|
+ PluginIndex index1a = createPluginIndex("data/help/searchindex/index510");
|
|
+ PluginIndex index1b = createPluginIndex("data/help/searchindex/index510");
|
|
assertTrue(index1a.equals(index1b));
|
|
}
|
|
-
|
|
+
|
|
/**
|
|
- * Verify that if the paths and plugins are the same two PluginIndex objects are equal
|
|
+ * Verify that if the paths and plugins are the same two PluginIndex objects
|
|
+ * are equal
|
|
*/
|
|
public void testPluginIndexHash() {
|
|
PluginIndex index1a = createPluginIndex("data/help/searchindex/index191");
|
|
PluginIndex index1b = createPluginIndex("data/help/searchindex/index191");
|
|
assertEquals(index1a.hashCode(), index1b.hashCode());
|
|
}
|
|
-
|
|
+
|
|
/**
|
|
- * Verify that if the paths are different two PluginIndex objects are not equal
|
|
+ * Verify that if the paths are different two PluginIndex objects are not
|
|
+ * equal
|
|
*/
|
|
public void testPluginIndexInequality() {
|
|
PluginIndex index1 = createPluginIndex("data/help/searchindex/index191");
|
|
PluginIndex index2 = createPluginIndex("data/help/searchindex/index291");
|
|
assertFalse(index1.equals(index2));
|
|
}
|
|
+
|
|
+ public void test5_1_0IndexReadable() throws CorruptIndexException, IOException {
|
|
+ checkReadable("data/help/searchindex/index510");
|
|
+ }
|
|
|
|
- /*
|
|
- * Verifies that a prebuilt index can be searched
|
|
- */
|
|
- private void checkReadable(String indexPath) throws IOException,
|
|
- CorruptIndexException {
|
|
+ /*
|
|
+ * Verifies that a prebuilt index can be searched
|
|
+ */
|
|
+ private void checkReadable(String indexPath) throws IOException, CorruptIndexException {
|
|
Path path = new Path(indexPath);
|
|
- Bundle bundle = UserAssistanceTestPlugin.getDefault().getBundle();
|
|
+ Bundle bundle = UserAssistanceTestPlugin.getDefault().getBundle();
|
|
URL url = FileLocator.find(bundle, path, null);
|
|
URL resolved = FileLocator.resolve(url);
|
|
if ("file".equals(resolved.getProtocol())) { //$NON-NLS-1$
|
|
String filePath = resolved.getFile();
|
|
- QueryBuilder queryBuilder = new QueryBuilder("eclipse", new AnalyzerDescriptor("en-us"));
|
|
- Query luceneQuery = queryBuilder.getLuceneQuery(new ArrayList<String>() , false);
|
|
+ StandardAnalyzer standardAnalyzer = new StandardAnalyzer();
|
|
+ QueryBuilder builder = new QueryBuilder(standardAnalyzer);
|
|
+ Query luceneQuery = builder.createBooleanQuery("contents", "eclipse");
|
|
Directory luceneDirectory = null;
|
|
IndexSearcher searcher = null;
|
|
try {
|
|
- luceneDirectory = new NIOFSDirectory(new File(filePath));
|
|
- searcher = new IndexSearcher(IndexReader.open(luceneDirectory, true));
|
|
+ luceneDirectory = new NIOFSDirectory(new File(filePath).toPath());
|
|
+ searcher = new IndexSearcher(DirectoryReader.open(luceneDirectory));
|
|
TopDocs hits = searcher.search(luceneQuery, 500);
|
|
- assertEquals(hits.totalHits, 1);
|
|
+ assertEquals(1, hits.totalHits );
|
|
} finally {
|
|
if (luceneDirectory != null)
|
|
try {
|
|
@@ -167,13 +155,13 @@
|
|
} catch (IOException x) {
|
|
}
|
|
if (searcher != null)
|
|
- searcher.close();
|
|
+ searcher.getIndexReader().close();
|
|
}
|
|
} else {
|
|
fail("Cannot resolve to file protocol");
|
|
}
|
|
}
|
|
-
|
|
+
|
|
/*
|
|
* Tests the isCompatible method in PluginIndex
|
|
*/
|
|
@@ -186,22 +174,20 @@
|
|
public PluginIndex createPluginIndex(String versionDirectory) {
|
|
PluginIndex pluginIndex;
|
|
SearchIndexWithIndexingProgress index = BaseHelpSystem.getLocalSearchManager().getIndex("en_us".toString());
|
|
- BaseHelpSystem.getLocalSearchManager().ensureIndexUpdated(
|
|
- new NullProgressMonitor(),
|
|
- index);
|
|
+ BaseHelpSystem.getLocalSearchManager().ensureIndexUpdated(new NullProgressMonitor(), index);
|
|
pluginIndex = new PluginIndex("org.eclipse.ua.tests", "data/help/searchindex/" + versionDirectory, index);
|
|
return pluginIndex;
|
|
}
|
|
-
|
|
+
|
|
/*
|
|
- * Tests the isLuceneCompatible method in SearchIndex
|
|
+ * Tests the isLuceneCompatible method in SearchIndex
|
|
*/
|
|
private void checkLuceneCompatible(String version, boolean expected) {
|
|
SearchIndexWithIndexingProgress index = BaseHelpSystem.getLocalSearchManager().getIndex("en_us".toString());
|
|
- BaseHelpSystem.getLocalSearchManager().ensureIndexUpdated(
|
|
- new NullProgressMonitor(),
|
|
- index);
|
|
+ BaseHelpSystem.getLocalSearchManager().ensureIndexUpdated(new NullProgressMonitor(), index);
|
|
assertEquals(expected, index.isLuceneCompatible(version));
|
|
}
|
|
|
|
+
|
|
+
|
|
}
|
|
diff --git a/org.eclipse.ua.tests/help/org/eclipse/ua/tests/help/search/SearchParticipantTest.java b/org.eclipse.ua.tests/help/org/eclipse/ua/tests/help/search/SearchParticipantTest.java
|
|
index 223e42a..2e782c3 100644
|
|
--- a/org.eclipse.ua.tests/help/org/eclipse/ua/tests/help/search/SearchParticipantTest.java
|
|
+++ b/org.eclipse.ua.tests/help/org/eclipse/ua/tests/help/search/SearchParticipantTest.java
|
|
@@ -1,5 +1,5 @@
|
|
/*******************************************************************************
|
|
- * Copyright (c) 2010 IBM Corporation and others.
|
|
+ * Copyright (c) 2010, 2015 IBM Corporation and others.
|
|
* All rights reserved. This program and the accompanying materials
|
|
* are made available under the terms of the Eclipse Public License v1.0
|
|
* which accompanies this distribution, and is available at
|
|
@@ -7,6 +7,7 @@
|
|
*
|
|
* Contributors:
|
|
* IBM Corporation - initial API and implementation
|
|
+ * Sopot Cela - Bug 466829 - Migration to Lucene 5
|
|
*******************************************************************************/
|
|
package org.eclipse.ua.tests.help.search;
|
|
|
|
@@ -40,7 +41,7 @@
|
|
}
|
|
|
|
public void testSearchUsingAndInSeparateDocs() {
|
|
- SearchTestUtils.searchAllLocales("jduehdye and olhoykk", new String[0]);
|
|
+ SearchTestUtils.searchAllLocales("jduehdye AND olhoykk", new String[0]);
|
|
}
|
|
|
|
public void testSearchExactMatch() {
|