321 lines
15 KiB
Diff
321 lines
15 KiB
Diff
diff -rupN gimagereader-3.3.1/gtk/src/Recognizer.cc gimagereader-3.3.1-new/gtk/src/Recognizer.cc
|
|
--- gimagereader-3.3.1/gtk/src/Recognizer.cc 2019-07-28 22:59:40.000000000 +0200
|
|
+++ gimagereader-3.3.1-new/gtk/src/Recognizer.cc 2021-12-14 12:07:11.113449181 +0100
|
|
@@ -29,8 +29,9 @@
|
|
#include <cstring>
|
|
#define USE_STD_NAMESPACE
|
|
#include <tesseract/baseapi.h>
|
|
-#include <tesseract/strngs.h>
|
|
+#if TESSERACT_MAJOR_VERSION < 5
|
|
#include <tesseract/genericvector.h>
|
|
+#endif
|
|
#include <tesseract/ocrclass.h>
|
|
#undef USE_STD_NAMESPACE
|
|
#include <unistd.h>
|
|
@@ -44,7 +45,11 @@
|
|
|
|
class Recognizer::ProgressMonitor : public MainWindow::ProgressMonitor {
|
|
public:
|
|
+#if TESSERACT_MAJOR_VERSION < 5
|
|
ETEXT_DESC desc;
|
|
+#else
|
|
+ tesseract::ETEXT_DESC desc;
|
|
+#endif
|
|
|
|
ProgressMonitor(int nPages) : MainWindow::ProgressMonitor(nPages) {
|
|
desc.progress = 0;
|
|
@@ -121,12 +126,20 @@ Recognizer::Recognizer(const Ui::MainWin
|
|
}
|
|
|
|
std::vector<Glib::ustring> Recognizer::getAvailableLanguages() const {
|
|
- tesseract::TessBaseAPI tess = initTesseract();
|
|
+ auto tess = initTesseract();
|
|
+#if TESSERACT_MAJOR_VERSION < 5
|
|
GenericVector<STRING> availLanguages;
|
|
- tess.GetAvailableLanguagesAsVector(&availLanguages);
|
|
+#else
|
|
+ std::vector<std::string> availLanguages;
|
|
+#endif
|
|
+ tess->GetAvailableLanguagesAsVector(&availLanguages);
|
|
std::vector<Glib::ustring> result;
|
|
for(int i = 0; i < availLanguages.size(); ++i) {
|
|
- result.push_back(availLanguages[i].string());
|
|
+#if TESSERACT_MAJOR_VERSION < 5
|
|
+ result.push_back(availLanguages[i].string());
|
|
+#else
|
|
+ result.push_back(availLanguages[i]);
|
|
+#endif
|
|
}
|
|
std::sort(result.begin(), result.end(), [](const Glib::ustring & s1, const Glib::ustring & s2) {
|
|
bool s1Script = s1.substr(0, 6) == "script" || s1.substr(0, 1) == s1.substr(0, 1).uppercase();
|
|
@@ -140,13 +153,13 @@ std::vector<Glib::ustring> Recognizer::g
|
|
return result;
|
|
}
|
|
|
|
-tesseract::TessBaseAPI Recognizer::initTesseract(const char* language, bool* ok) const {
|
|
+std::unique_ptr<tesseract::TessBaseAPI> Recognizer::initTesseract(const char* language, bool* ok) const {
|
|
// unfortunately tesseract creates deliberate segfaults when an error occurs
|
|
std::signal(SIGABRT, MainWindow::tesseractCrash);
|
|
std::string current = setlocale(LC_ALL, NULL);
|
|
setlocale(LC_ALL, "C");
|
|
- tesseract::TessBaseAPI tess;
|
|
- int ret = tess.Init(nullptr, language);
|
|
+ std::unique_ptr<tesseract::TessBaseAPI> tess(new tesseract::TessBaseAPI());
|
|
+ int ret = tess->Init(nullptr, language);
|
|
std::signal(SIGSEGV, MainWindow::signalHandler);
|
|
setlocale(LC_ALL, current.c_str());
|
|
|
|
@@ -442,17 +455,17 @@ void Recognizer::recognize(const std::ve
|
|
bool prependFile = pages.size() > 1 && ConfigSettings::get<SwitchSetting>("ocraddsourcefilename")->getValue();
|
|
bool prependPage = pages.size() > 1 && ConfigSettings::get<SwitchSetting>("ocraddsourcepage")->getValue();
|
|
bool ok = false;
|
|
- tesseract::TessBaseAPI tess = initTesseract(m_curLang.prefix.c_str(), &ok);
|
|
+ auto tess = initTesseract(m_curLang.prefix.c_str(), &ok);
|
|
if(ok) {
|
|
Glib::ustring failed;
|
|
- tess.SetPageSegMode(static_cast<tesseract::PageSegMode>(m_currentPsmMode));
|
|
+ tess->SetPageSegMode(static_cast<tesseract::PageSegMode>(m_currentPsmMode));
|
|
if(ui.radioButtonWhitelist->get_active()) {
|
|
- tess.SetVariable("tessedit_char_whitelist", ui.entryWhitelist->get_text().c_str());
|
|
+ tess->SetVariable("tessedit_char_whitelist", ui.entryWhitelist->get_text().c_str());
|
|
}
|
|
if(ui.radioButtonBlacklist->get_active()) {
|
|
- tess.SetVariable("tessedit_char_blacklist", ui.entryBlacklist->get_text().c_str());
|
|
+ tess->SetVariable("tessedit_char_blacklist", ui.entryBlacklist->get_text().c_str());
|
|
}
|
|
- OutputEditor::ReadSessionData* readSessionData = MAIN->getOutputEditor()->initRead(tess);
|
|
+ OutputEditor::ReadSessionData* readSessionData = MAIN->getOutputEditor()->initRead(*tess);
|
|
ProgressMonitor monitor(pages.size());
|
|
MAIN->showProgress(&monitor);
|
|
Utils::busyTask([&] {
|
|
@@ -483,11 +496,11 @@ void Recognizer::recognize(const std::ve
|
|
readSessionData->prependFile = prependFile && (readSessionData->prependPage || newFile);
|
|
firstChunk = false;
|
|
newFile = false;
|
|
- tess.SetImage(image->get_data(), image->get_width(), image->get_height(), 4, image->get_stride());
|
|
- tess.SetSourceResolution(MAIN->getDisplayer()->getCurrentResolution());
|
|
- tess.Recognize(&monitor.desc);
|
|
+ tess->SetImage(image->get_data(), image->get_width(), image->get_height(), 4, image->get_stride());
|
|
+ tess->SetSourceResolution(MAIN->getDisplayer()->getCurrentResolution());
|
|
+ tess->Recognize(&monitor.desc);
|
|
if(!monitor.cancelled()) {
|
|
- MAIN->getOutputEditor()->read(tess, readSessionData);
|
|
+ MAIN->getOutputEditor()->read(*tess, readSessionData);
|
|
}
|
|
}
|
|
|
|
@@ -509,23 +522,23 @@ void Recognizer::recognize(const std::ve
|
|
|
|
bool Recognizer::recognizeImage(const Cairo::RefPtr<Cairo::ImageSurface>& img, OutputDestination dest) {
|
|
bool ok = false;
|
|
- tesseract::TessBaseAPI tess = initTesseract(m_curLang.prefix.c_str(), &ok);
|
|
+ auto tess = initTesseract(m_curLang.prefix.c_str(), &ok);
|
|
if(!ok) {
|
|
Utils::message_dialog(Gtk::MESSAGE_ERROR, _("Recognition errors occurred"), _("Failed to initialize tesseract"));
|
|
return false;
|
|
}
|
|
- tess.SetImage(img->get_data(), img->get_width(), img->get_height(), 4, 4 * img->get_width());
|
|
+ tess->SetImage(img->get_data(), img->get_width(), img->get_height(), 4, 4 * img->get_width());
|
|
ProgressMonitor monitor(1);
|
|
MAIN->showProgress(&monitor);
|
|
if(dest == OutputDestination::Buffer) {
|
|
- OutputEditor::ReadSessionData* readSessionData = MAIN->getOutputEditor()->initRead(tess);
|
|
+ OutputEditor::ReadSessionData* readSessionData = MAIN->getOutputEditor()->initRead(*tess);
|
|
readSessionData->file = MAIN->getDisplayer()->getCurrentImage(readSessionData->page);
|
|
readSessionData->angle = MAIN->getDisplayer()->getCurrentAngle();
|
|
readSessionData->resolution = MAIN->getDisplayer()->getCurrentResolution();
|
|
Utils::busyTask([&] {
|
|
- tess.Recognize(&monitor.desc);
|
|
+ tess->Recognize(&monitor.desc);
|
|
if(!monitor.cancelled()) {
|
|
- MAIN->getOutputEditor()->read(tess, readSessionData);
|
|
+ MAIN->getOutputEditor()->read(*tess, readSessionData);
|
|
}
|
|
return true;
|
|
}, _("Recognizing..."));
|
|
@@ -533,9 +546,9 @@ bool Recognizer::recognizeImage(const Ca
|
|
} else if(dest == OutputDestination::Clipboard) {
|
|
Glib::ustring output;
|
|
if(Utils::busyTask([&] {
|
|
- tess.Recognize(&monitor.desc);
|
|
+ tess->Recognize(&monitor.desc);
|
|
if(!monitor.cancelled()) {
|
|
- char* text = tess.GetUTF8Text();
|
|
+ char* text = tess->GetUTF8Text();
|
|
output = text;
|
|
delete[] text;
|
|
return true;
|
|
diff -rupN gimagereader-3.3.1/gtk/src/Recognizer.hh gimagereader-3.3.1-new/gtk/src/Recognizer.hh
|
|
--- gimagereader-3.3.1/gtk/src/Recognizer.hh 2019-07-28 22:59:40.000000000 +0200
|
|
+++ gimagereader-3.3.1-new/gtk/src/Recognizer.hh 2021-12-14 12:01:32.366977606 +0100
|
|
@@ -74,7 +74,7 @@ private:
|
|
|
|
sigc::signal<void, Config::Lang> m_signal_languageChanged;
|
|
|
|
- tesseract::TessBaseAPI initTesseract(const char* language = nullptr, bool* ok = nullptr) const;
|
|
+ std::unique_ptr<tesseract::TessBaseAPI> initTesseract(const char* language = nullptr, bool* ok = nullptr) const;
|
|
void manageCharaterLists();
|
|
void recognizeButtonClicked();
|
|
void recognizeCurrentPage();
|
|
diff -rupN gimagereader-3.3.1/qt/src/Recognizer.cc gimagereader-3.3.1-new/qt/src/Recognizer.cc
|
|
--- gimagereader-3.3.1/qt/src/Recognizer.cc 2019-07-28 22:59:40.000000000 +0200
|
|
+++ gimagereader-3.3.1-new/qt/src/Recognizer.cc 2021-12-14 12:12:16.224217558 +0100
|
|
@@ -31,8 +31,10 @@
|
|
#define USE_STD_NAMESPACE
|
|
#include <tesseract/baseapi.h>
|
|
#include <tesseract/ocrclass.h>
|
|
+#if TESSERACT_MAJOR_VERSION < 5
|
|
#include <tesseract/strngs.h>
|
|
#include <tesseract/genericvector.h>
|
|
+#endif
|
|
#undef USE_STD_NAMESPACE
|
|
#include <QMouseEvent>
|
|
#include <unistd.h>
|
|
@@ -52,7 +54,11 @@
|
|
|
|
class Recognizer::ProgressMonitor : public MainWindow::ProgressMonitor {
|
|
public:
|
|
+#if TESSERACT_MAJOR_VERSION < 5
|
|
ETEXT_DESC desc;
|
|
+#else
|
|
+ tesseract::ETEXT_DESC desc;
|
|
+#endif
|
|
|
|
ProgressMonitor(int nPages) : MainWindow::ProgressMonitor(nPages) {
|
|
desc.progress = 0;
|
|
@@ -111,13 +117,21 @@ Recognizer::Recognizer(const UI_MainWind
|
|
}
|
|
|
|
QStringList Recognizer::getAvailableLanguages() const {
|
|
- tesseract::TessBaseAPI tess = initTesseract();
|
|
+ auto tess = initTesseract();
|
|
+#if TESSERACT_MAJOR_VERSION < 5
|
|
GenericVector<STRING> availLanguages;
|
|
- tess.GetAvailableLanguagesAsVector(&availLanguages);
|
|
+#else
|
|
+ std::vector<std::string> availLanguages;
|
|
+#endif
|
|
+ tess->GetAvailableLanguagesAsVector(&availLanguages);
|
|
QStringList result;
|
|
for(int i = 0; i < availLanguages.size(); ++i) {
|
|
- result.append(availLanguages[i].string());
|
|
- }
|
|
+#if TESSERACT_MAJOR_VERSION < 5
|
|
+ result.push_back(availLanguages[i].string());
|
|
+#else
|
|
+ result.push_back(QString::fromStdString(availLanguages[i]));
|
|
+#endif
|
|
+ }
|
|
qSort(result.begin(), result.end(), [](const QString & s1, const QString & s2) {
|
|
bool s1Script = s1.startsWith("script") || s1.left(1) == s1.left(1).toUpper();
|
|
bool s2Script = s2.startsWith("script") || s2.left(1) == s2.left(1).toUpper();
|
|
@@ -130,13 +144,13 @@ QStringList Recognizer::getAvailableLang
|
|
return result;
|
|
}
|
|
|
|
-tesseract::TessBaseAPI Recognizer::initTesseract(const char* language, bool* ok) const {
|
|
+std::unique_ptr<tesseract::TessBaseAPI> Recognizer::initTesseract(const char* language, bool* ok) const {
|
|
// unfortunately tesseract creates deliberate aborts when an error occurs
|
|
std::signal(SIGABRT, MainWindow::tesseractCrash);
|
|
QByteArray current = setlocale(LC_ALL, NULL);
|
|
setlocale(LC_ALL, "C");
|
|
- tesseract::TessBaseAPI tess;
|
|
- int ret = tess.Init(nullptr, language);
|
|
+ std::unique_ptr<tesseract::TessBaseAPI> tess(new tesseract::TessBaseAPI());
|
|
+ int ret = tess->Init(nullptr, language);
|
|
setlocale(LC_NUMERIC, current.constData());
|
|
|
|
if(ok) {
|
|
@@ -432,17 +446,17 @@ void Recognizer::recognize(const QList<i
|
|
bool prependFile = pages.size() > 1 && ConfigSettings::get<SwitchSetting>("ocraddsourcefilename")->getValue();
|
|
bool prependPage = pages.size() > 1 && ConfigSettings::get<SwitchSetting>("ocraddsourcepage")->getValue();
|
|
bool ok = false;
|
|
- tesseract::TessBaseAPI tess = initTesseract(m_curLang.prefix.toLocal8Bit().constData(), &ok);
|
|
+ auto tess = initTesseract(m_curLang.prefix.toLocal8Bit().constData(), &ok);
|
|
if(ok) {
|
|
QString failed;
|
|
- tess.SetPageSegMode(static_cast<tesseract::PageSegMode>(m_psmCheckGroup->checkedAction()->data().toInt()));
|
|
+ tess->SetPageSegMode(static_cast<tesseract::PageSegMode>(m_psmCheckGroup->checkedAction()->data().toInt()));
|
|
if(m_charListDialogUi.radioButtonWhitelist->isChecked()) {
|
|
- tess.SetVariable("tessedit_char_whitelist", m_charListDialogUi.lineEditWhitelist->text().toLocal8Bit());
|
|
+ tess->SetVariable("tessedit_char_whitelist", m_charListDialogUi.lineEditWhitelist->text().toLocal8Bit());
|
|
}
|
|
if(m_charListDialogUi.radioButtonBlacklist->isChecked()) {
|
|
- tess.SetVariable("tessedit_char_blacklist", m_charListDialogUi.lineEditBlacklist->text().toLocal8Bit());
|
|
+ tess->SetVariable("tessedit_char_blacklist", m_charListDialogUi.lineEditBlacklist->text().toLocal8Bit());
|
|
}
|
|
- OutputEditor::ReadSessionData* readSessionData = MAIN->getOutputEditor()->initRead(tess);
|
|
+ OutputEditor::ReadSessionData* readSessionData = MAIN->getOutputEditor()->initRead(*tess);
|
|
ProgressMonitor monitor(pages.size());
|
|
MAIN->showProgress(&monitor);
|
|
Utils::busyTask([&] {
|
|
@@ -474,11 +488,11 @@ void Recognizer::recognize(const QList<i
|
|
readSessionData->prependFile = prependFile && (readSessionData->prependPage || newFile);
|
|
firstChunk = false;
|
|
newFile = false;
|
|
- tess.SetImage(image.bits(), image.width(), image.height(), 4, image.bytesPerLine());
|
|
- tess.SetSourceResolution(MAIN->getDisplayer()->getCurrentResolution());
|
|
- tess.Recognize(&monitor.desc);
|
|
+ tess->SetImage(image.bits(), image.width(), image.height(), 4, image.bytesPerLine());
|
|
+ tess->SetSourceResolution(MAIN->getDisplayer()->getCurrentResolution());
|
|
+ tess->Recognize(&monitor.desc);
|
|
if(!monitor.cancelled()) {
|
|
- MAIN->getOutputEditor()->read(tess, readSessionData);
|
|
+ MAIN->getOutputEditor()->read(*tess, readSessionData);
|
|
}
|
|
}
|
|
QMetaObject::invokeMethod(MAIN, "popState", Qt::QueuedConnection);
|
|
@@ -499,23 +513,23 @@ void Recognizer::recognize(const QList<i
|
|
|
|
bool Recognizer::recognizeImage(const QImage& image, OutputDestination dest) {
|
|
bool ok = false;
|
|
- tesseract::TessBaseAPI tess = initTesseract(m_curLang.prefix.toLocal8Bit().constData(), &ok);
|
|
+ auto tess = initTesseract(m_curLang.prefix.toLocal8Bit().constData(), &ok);
|
|
if(!ok) {
|
|
QMessageBox::critical(MAIN, _("Recognition errors occurred"), _("Failed to initialize tesseract"));
|
|
return false;
|
|
}
|
|
- tess.SetImage(image.bits(), image.width(), image.height(), 4, image.bytesPerLine());
|
|
+ tess->SetImage(image.bits(), image.width(), image.height(), 4, image.bytesPerLine());
|
|
ProgressMonitor monitor(1);
|
|
MAIN->showProgress(&monitor);
|
|
if(dest == OutputDestination::Buffer) {
|
|
- OutputEditor::ReadSessionData* readSessionData = MAIN->getOutputEditor()->initRead(tess);
|
|
+ OutputEditor::ReadSessionData* readSessionData = MAIN->getOutputEditor()->initRead(*tess);
|
|
readSessionData->file = MAIN->getDisplayer()->getCurrentImage(readSessionData->page);
|
|
readSessionData->angle = MAIN->getDisplayer()->getCurrentAngle();
|
|
readSessionData->resolution = MAIN->getDisplayer()->getCurrentResolution();
|
|
Utils::busyTask([&] {
|
|
- tess.Recognize(&monitor.desc);
|
|
+ tess->Recognize(&monitor.desc);
|
|
if(!monitor.cancelled()) {
|
|
- MAIN->getOutputEditor()->read(tess, readSessionData);
|
|
+ MAIN->getOutputEditor()->read(*tess, readSessionData);
|
|
}
|
|
return true;
|
|
}, _("Recognizing..."));
|
|
@@ -523,9 +537,9 @@ bool Recognizer::recognizeImage(const QI
|
|
} else if(dest == OutputDestination::Clipboard) {
|
|
QString output;
|
|
if(Utils::busyTask([&] {
|
|
- tess.Recognize(&monitor.desc);
|
|
+ tess->Recognize(&monitor.desc);
|
|
if(!monitor.cancelled()) {
|
|
- char* text = tess.GetUTF8Text();
|
|
+ char* text = tess->GetUTF8Text();
|
|
output = QString::fromUtf8(text);
|
|
delete[] text;
|
|
return true;
|
|
diff -rupN gimagereader-3.3.1/qt/src/Recognizer.hh gimagereader-3.3.1-new/qt/src/Recognizer.hh
|
|
--- gimagereader-3.3.1/qt/src/Recognizer.hh 2019-07-28 22:59:40.000000000 +0200
|
|
+++ gimagereader-3.3.1-new/qt/src/Recognizer.hh 2021-12-14 12:05:50.527922131 +0100
|
|
@@ -79,7 +79,7 @@ private:
|
|
QString m_langLabel;
|
|
Config::Lang m_curLang;
|
|
|
|
- tesseract::TessBaseAPI initTesseract(const char* language = nullptr, bool* ok = nullptr) const;
|
|
+ std::unique_ptr<tesseract::TessBaseAPI> initTesseract(const char* language = nullptr, bool* ok = nullptr) const;
|
|
QList<int> selectPages(bool& autodetectLayout);
|
|
void recognize(const QList<int>& pages, bool autodetectLayout = false);
|
|
bool eventFilter(QObject* obj, QEvent* ev) override;
|