31040e724c
languages
360 lines
12 KiB
Diff
360 lines
12 KiB
Diff
--- kdelibs/kdecore/sonnet/filter_p.h 2009/10/01 20:06:45 1030189
|
|
+++ kdelibs/kdecore/sonnet/filter_p.h 2009/11/29 23:10:00 1056377
|
|
@@ -22,6 +22,7 @@
|
|
#ifndef SONNET_FILTER_P_H
|
|
#define SONNET_FILTER_P_H
|
|
|
|
+#include <QtCore/QTextBoundaryFinder>
|
|
#include <QtCore/QString>
|
|
#include <kdecore_export.h>
|
|
|
|
@@ -89,7 +90,6 @@
|
|
void restart();
|
|
|
|
virtual Word nextWord() const;
|
|
- virtual Word previousWord() const;
|
|
virtual Word wordAtPosition( unsigned int pos ) const;
|
|
|
|
virtual void setCurrentPosition( int );
|
|
@@ -103,13 +103,12 @@
|
|
protected:
|
|
bool trySkipLinks() const;
|
|
bool ignore( const QString& word ) const;
|
|
- QChar skipToLetter( int &fromPosition ) const;
|
|
bool shouldBeSkipped( bool wordWasUppercase, bool wordWasRunTogether,
|
|
const QString& foundWord ) const;
|
|
|
|
protected:
|
|
- QString m_buffer;
|
|
- mutable int m_currentPosition;
|
|
+ QString m_buffer;
|
|
+ mutable QTextBoundaryFinder m_finder;
|
|
|
|
private:
|
|
class Private;
|
|
--- kdelibs/kdecore/sonnet/filter.cpp 2009/06/22 10:01:33 985167
|
|
+++ kdelibs/kdecore/sonnet/filter.cpp 2009/11/29 23:10:00 1056377
|
|
@@ -52,8 +52,7 @@
|
|
}
|
|
|
|
Filter::Filter()
|
|
- : m_currentPosition(0),
|
|
- d(new Private)
|
|
+ : d(new Private)
|
|
{
|
|
d->settings = 0;
|
|
}
|
|
@@ -75,13 +74,13 @@
|
|
|
|
void Filter::restart()
|
|
{
|
|
- m_currentPosition = 0;
|
|
+ m_finder.toStart();
|
|
}
|
|
|
|
void Filter::setBuffer( const QString& buffer )
|
|
{
|
|
- m_buffer = buffer;
|
|
- m_currentPosition = 0;
|
|
+ m_buffer = buffer;
|
|
+ m_finder = QTextBoundaryFinder(QTextBoundaryFinder::Word, m_buffer);
|
|
}
|
|
|
|
QString Filter::buffer() const
|
|
@@ -91,135 +90,110 @@
|
|
|
|
bool Filter::atEnd() const
|
|
{
|
|
- return m_currentPosition >= m_buffer.length();
|
|
+ return m_finder.position() >= m_buffer.length();
|
|
}
|
|
|
|
-Word Filter::nextWord() const
|
|
+static inline bool
|
|
+isSpaceOrPunct(const QString &str)
|
|
{
|
|
- QChar currentChar = skipToLetter( m_currentPosition );
|
|
-
|
|
- if ( m_currentPosition >= m_buffer.length() || currentChar.isNull() ) {
|
|
- return Filter::end();
|
|
- }
|
|
+ return (str.length() <= 1 && (str[0].isSpace() || str[0].isPunct()));
|
|
+}
|
|
|
|
- bool allUppercase = currentChar.category() & QChar::Letter_Uppercase;
|
|
- bool runTogether = false;
|
|
+static bool
|
|
+finderNextWord(QTextBoundaryFinder &finder, QString &word, int &bufferStart)
|
|
+{
|
|
+ QTextBoundaryFinder::BoundaryReasons boundary = finder.boundaryReasons();
|
|
+ int start = finder.position(), end = finder.position();
|
|
+ bool inWord = (boundary & QTextBoundaryFinder::StartWord) != 0;
|
|
|
|
- QString foundWord;
|
|
- int start = m_currentPosition;
|
|
+ while (finder.toNextBoundary() > 0) {
|
|
+ boundary = finder.boundaryReasons();
|
|
|
|
- // Loop through the chars of the word, until the current char is not a letter
|
|
- // anymore.
|
|
- // Include apostrophes in the word, but not when it is the first character,
|
|
- // as it might be used as 'quotes'.
|
|
- // This way, we'll pass contractions like "I've" to the spellchecker, and
|
|
- // only the word inside apostrophe-quotes, without the apostrophes.
|
|
- while ( currentChar.isLetter() ||
|
|
- ( currentChar == '\'' && start != m_currentPosition ) ) {
|
|
- if ( currentChar.category() & QChar::Letter_Lowercase )
|
|
- allUppercase = false;
|
|
-
|
|
- /* FIXME: this does not work for Hebrew for example
|
|
- //we consider run-together words as mixed-case words
|
|
- if ( !allUppercase &&
|
|
- currentChar.category() & QChar::Letter_Uppercase )
|
|
- runTogether = true;
|
|
- */
|
|
-
|
|
- foundWord += currentChar;
|
|
- //Test if currentPosition exists, otherwise go out
|
|
- if( (m_currentPosition + 1) >= m_buffer.length()) {
|
|
-
|
|
- // Remove apostrophes at the end of the word, it probably comes from
|
|
- // quoting with apostrophes.
|
|
- if ( foundWord.endsWith( '\'' ) )
|
|
- foundWord.chop( 1 );
|
|
-
|
|
- if ( shouldBeSkipped( allUppercase, runTogether, foundWord ) ) {
|
|
- ++m_currentPosition;
|
|
- return nextWord();
|
|
- }
|
|
- else {
|
|
- ++m_currentPosition;
|
|
- return Word( foundWord, start );
|
|
+ if ((boundary & QTextBoundaryFinder::EndWord) && inWord) {
|
|
+ end = finder.position();
|
|
+ QString str = finder.string().mid(start, end - start);
|
|
+ if (!isSpaceOrPunct(str)) {
|
|
+ word = str;
|
|
+ bufferStart = start;
|
|
+#if 0
|
|
+ qDebug()<< "Word at " << start<< " word = '"
|
|
+ << str << "', len = " << str.length();
|
|
+#endif
|
|
+ return true;
|
|
}
|
|
+ inWord = false;
|
|
+ }
|
|
+ if ((boundary & QTextBoundaryFinder::StartWord)) {
|
|
+ start = finder.position();
|
|
+ inWord = true;
|
|
}
|
|
- ++m_currentPosition;
|
|
- currentChar = m_buffer.at( m_currentPosition );
|
|
}
|
|
+ return false;
|
|
+}
|
|
|
|
- // Remove apostrophes at the end of the word, it probably comes from
|
|
- // quoting with apostrophes.
|
|
- if ( foundWord.endsWith( '\'' ) )
|
|
- foundWord.chop( 1 );
|
|
-
|
|
- if ( shouldBeSkipped( allUppercase, runTogether, foundWord ) )
|
|
- return nextWord();
|
|
- return Word( foundWord, start );
|
|
+static bool finderWordAt(QTextBoundaryFinder &finder,
|
|
+ int at,
|
|
+ QString &word, int &bufferStart)
|
|
+{
|
|
+ int oldPosition = finder.position();
|
|
+
|
|
+ finder.setPosition(at);
|
|
+ if (!finder.isAtBoundary() || (finder.boundaryReasons() & QTextBoundaryFinder::EndWord)) {
|
|
+ if (finder.toPreviousBoundary() <= 0) {
|
|
+ /* QTextBoundaryIterator doesn't consider start of the string
|
|
+ * a boundary so we need to rewind to the beginning to catch
|
|
+ * the first word */
|
|
+ if (at > 0 && finder.string().length() > 0) {
|
|
+ finder.toStart();
|
|
+ } else
|
|
+ return false;
|
|
+ }
|
|
+ }
|
|
+ bool ret = finderNextWord(finder, word, bufferStart);
|
|
+ finder.setPosition(oldPosition);
|
|
+ return ret;
|
|
}
|
|
|
|
-Word Filter::previousWord() const
|
|
+Word Filter::nextWord() const
|
|
{
|
|
- while ( !m_buffer.at( m_currentPosition ).isLetter() &&
|
|
- m_currentPosition != 0) {
|
|
- --m_currentPosition;
|
|
- }
|
|
+ QString foundWord;
|
|
+ int start;
|
|
+ bool allUppercase = false;
|
|
+ bool runTogether = false;
|
|
|
|
- if ( m_currentPosition == 0 ) {
|
|
+ if (!finderNextWord(m_finder, foundWord, start))
|
|
return Filter::end();
|
|
- }
|
|
-
|
|
- QString foundWord;
|
|
- int start = m_currentPosition;
|
|
- while ( m_buffer.at( start ).isLetter() ) {
|
|
- foundWord.prepend( m_buffer.at( m_currentPosition ) );
|
|
- --start;
|
|
- }
|
|
|
|
+ if ( shouldBeSkipped( allUppercase, runTogether, foundWord ) )
|
|
+ return nextWord();
|
|
return Word( foundWord, start );
|
|
}
|
|
|
|
Word Filter::wordAtPosition( unsigned int pos ) const
|
|
{
|
|
- if ( (int)pos > m_buffer.length() )
|
|
- return Filter::end();
|
|
-
|
|
- int currentPosition = pos - 1;
|
|
QString foundWord;
|
|
- while ( currentPosition >= 0 &&
|
|
- m_buffer.at( currentPosition ).isLetter() ) {
|
|
- foundWord.prepend( m_buffer.at( currentPosition ) );
|
|
- --currentPosition;
|
|
- }
|
|
-
|
|
- // currentPosition == 0 means the first char is not letter
|
|
- // currentPosition == -1 means we reached the beginning
|
|
- int start = (currentPosition < 0) ? 0 : ++currentPosition;
|
|
- currentPosition = pos ;
|
|
- if ( currentPosition < m_buffer.length() && m_buffer.at( currentPosition ).isLetter() ) {
|
|
- while ( m_buffer.at( currentPosition ).isLetter() ) {
|
|
- foundWord.append( m_buffer.at( currentPosition ) );
|
|
- ++currentPosition;
|
|
- }
|
|
- }
|
|
-
|
|
+ int start;
|
|
+ if (!finderWordAt(m_finder, pos, foundWord, start))
|
|
+ return Filter::end();
|
|
return Word( foundWord, start );
|
|
}
|
|
|
|
|
|
void Filter::setCurrentPosition( int i )
|
|
{
|
|
- m_currentPosition = i;
|
|
+ QString word;
|
|
+ int pos;
|
|
|
|
- //go back to the last word so that next word returns something
|
|
- //useful
|
|
- while ( m_buffer.at( m_currentPosition ).isLetter() && m_currentPosition > 0 )
|
|
- --m_currentPosition;
|
|
+ //to make sure we're at an reasonable word boundary
|
|
+ if (!finderWordAt(m_finder, i, word, pos)) {
|
|
+ return;
|
|
+ }
|
|
+ m_finder.setPosition(pos);
|
|
}
|
|
|
|
int Filter::currentPosition() const
|
|
{
|
|
- return m_currentPosition;
|
|
+ return m_finder.position();
|
|
}
|
|
|
|
void Filter::replace( const Word& w, const QString& newWord)
|
|
@@ -227,8 +201,10 @@
|
|
int oldLen = w.word.length();
|
|
|
|
//start spell checkin from the just correct word
|
|
- m_currentPosition = w.start;
|
|
m_buffer = m_buffer.replace( w.start, oldLen, newWord );
|
|
+ m_finder = QTextBoundaryFinder(QTextBoundaryFinder::Word,
|
|
+ m_buffer);
|
|
+ m_finder.setPosition(w.start);
|
|
}
|
|
|
|
QString Filter::context() const
|
|
@@ -236,12 +212,12 @@
|
|
int len = 60;
|
|
//we don't want the expression underneath casted to an unsigned int
|
|
//which would cause it to always evaluate to false
|
|
- int signedPosition = m_currentPosition;
|
|
+ int signedPosition = m_finder.position();
|
|
bool begin = (signedPosition - len/2)<=0;
|
|
|
|
|
|
QString buffer = m_buffer;
|
|
- Word word = wordAtPosition( m_currentPosition );
|
|
+ Word word = wordAtPosition( m_finder.position() );
|
|
buffer = buffer.replace( word.start, word.word.length(),
|
|
QString( "<b>%1</b>" ).arg( word.word ) );
|
|
|
|
@@ -251,7 +227,7 @@
|
|
.arg( buffer.mid( 0, len ) );
|
|
else
|
|
context = QString( "...%1..." )
|
|
- .arg( buffer.mid( m_currentPosition - 20, len ) );
|
|
+ .arg( buffer.mid( m_finder.position() - 20, len ) );
|
|
|
|
context.replace( '\n', ' ' );
|
|
|
|
@@ -260,24 +236,27 @@
|
|
|
|
bool Filter::trySkipLinks() const
|
|
{
|
|
- QChar currentChar = m_buffer.at( m_currentPosition );
|
|
+ QChar currentChar = m_buffer.at( m_finder.position() );
|
|
+ int currentPosition = m_finder.position();
|
|
|
|
int length = m_buffer.length();
|
|
//URL - if so skip
|
|
if ( currentChar == ':'
|
|
- && (m_currentPosition+1 < length)
|
|
- && (m_buffer.at( ++m_currentPosition ) == '/' || ( m_currentPosition + 1 ) >= length ) ) {
|
|
+ && (currentPosition+1 < length)
|
|
+ && (m_buffer.at( ++currentPosition ) == '/' || ( currentPosition + 1 ) >= length ) ) {
|
|
//in both cases url is considered finished at the first whitespace occurrence
|
|
//TODO hey, "http://en.wikipedia.org/wiki/Main Page" --Nick Shaforostoff
|
|
- while ( !m_buffer.at( m_currentPosition++ ).isSpace() && m_currentPosition < length )
|
|
+ while ( !m_buffer.at( currentPosition++ ).isSpace() && currentPosition < length )
|
|
;
|
|
+ m_finder.setPosition(currentPosition);
|
|
return true;
|
|
}
|
|
|
|
//Email - if so skip
|
|
if ( currentChar == '@') {
|
|
- while ( ++m_currentPosition < length && !m_buffer.at( m_currentPosition ).isSpace() )
|
|
+ while ( ++currentPosition < length && !m_buffer.at( currentPosition ).isSpace() )
|
|
;
|
|
+ m_finder.setPosition(currentPosition);
|
|
return true;
|
|
}
|
|
|
|
@@ -289,19 +268,6 @@
|
|
return d->settings && d->settings->ignore( word );
|
|
}
|
|
|
|
-QChar Filter::skipToLetter( int &fromPosition ) const
|
|
-{
|
|
- //if( m_buffer.isEmpty())
|
|
- if (fromPosition>=m_buffer.size())
|
|
- return QChar();
|
|
- QChar currentChar = m_buffer.at( fromPosition );
|
|
- while ( !currentChar.isLetter() &&
|
|
- (int)++fromPosition < m_buffer.length() ) {
|
|
- currentChar = m_buffer.at( fromPosition );
|
|
- }
|
|
- return currentChar;
|
|
-}
|
|
-
|
|
bool Filter::shouldBeSkipped( bool wordWasUppercase, bool wordWasRunTogether,
|
|
const QString& foundWord ) const
|
|
{
|