Update to latest version (#2028997)

This commit is contained in:
Elliott Sales de Andrade 2022-03-04 04:40:28 -05:00
parent d87e8adb0b
commit e82022a952
5 changed files with 8 additions and 101 deletions

1
.gitignore vendored
View File

@ -1,3 +1,4 @@
/tesseract_4.1.tar.gz
/tesseract_4.1.1.tar.gz
/tesseract_4.1.2.tar.gz
/tesseract_5.0.0.tar.gz

View File

@ -2,11 +2,11 @@
%bcond_with network
%global packname tesseract
%global packver 4.1.2
%global packver 5.0.0
%global rlibdir %{_libdir}/R/library
Name: R-%{packname}
Version: 4.1.2
Version: 5.0.0
Release: %autorelease
Summary: Open Source OCR Engine
@ -14,11 +14,6 @@ License: ASL 2.0
URL: https://CRAN.R-project.org/package=%{packname}
Source0: https://cran.r-project.org/src/contrib/%{packname}_%{packver}.tar.gz
# Fix leptonica include
Patch0: R-tesseract_include.patch
# Add support for tesseract 5
Patch1: R-tesseract_tesseract5.patch
# Here's the R view of the dependencies world:
# Depends:
# Imports: R-Rcpp >= 0.12.12, R-pdftools >= 1.5, R-curl, R-rappdirs, R-digest
@ -40,6 +35,10 @@ BuildRequires: R-tibble
BuildRequires: R-rmarkdown
BuildRequires: pkgconfig(lept)
BuildRequires: pkgconfig(tesseract)
%if %{with network}
# For rmarkdown, because docs get built.
BuildRequires: glyphicons-halflings-fonts
%endif
%description
Bindings to 'Tesseract' <https://opensource.google/projects/tesseract>: a

View File

@ -1,7 +0,0 @@
diff -rupN --no-dereference a/tesseract/tools/test.cpp b/tesseract/tools/test.cpp
--- a/tesseract/tools/test.cpp 2020-03-30 00:30:33.000000000 +0200
+++ b/tesseract/tools/test.cpp 2021-12-11 22:49:04.133669737 +0100
@@ -1,2 +1,2 @@
#include <tesseract/baseapi.h>
-#include <allheaders.h>
+#include <leptonica/allheaders.h>

View File

@ -1,86 +0,0 @@
diff -rupN a/tesseract/src/tesseract.cpp b/tesseract/src/tesseract.cpp
--- a/tesseract/src/tesseract.cpp 2021-09-17 12:44:09.000000000 +0200
+++ b/tesseract/src/tesseract.cpp 2021-12-14 11:41:43.435908296 +0100
@@ -1,5 +1,7 @@
#include "tesseract_types.h"
+#if TESSERACT_MAJOR_VERSION < 5
#include <tesseract/genericvector.h>
+#endif
/* NB: libtesseract now insists that the engine is initiated in 'C' locale.
* We do this as exemplified in the example code in the libc manual:
@@ -42,7 +44,11 @@ Rcpp::List tesseract_config(){
// [[Rcpp::export]]
TessPtr tesseract_engine_internal(Rcpp::CharacterVector datapath, Rcpp::CharacterVector language, Rcpp::CharacterVector confpaths,
Rcpp::CharacterVector opt_names, Rcpp::CharacterVector opt_values){
+#if TESSERACT_MAJOR_VERSION < 5
GenericVector<STRING> params, values;
+#else
+ std::vector<std::string> params, values;
+#endif
const char * path = NULL;
const char * lang = NULL;
char * configs[1000] = {0};
@@ -88,7 +94,11 @@ TessPtr tesseract_engine_set_variable(Te
// [[Rcpp::export]]
Rcpp::LogicalVector validate_params(Rcpp::CharacterVector params){
+#if TESSERACT_MAJOR_VERSION < 5
STRING str;
+#else
+ std::string str;
+#endif
tesseract::TessBaseAPI *api = make_analyze_api();
Rcpp::LogicalVector out(params.length());
for(int i = 0; i < params.length(); i++)
@@ -101,16 +111,30 @@ Rcpp::LogicalVector validate_params(Rcpp
// [[Rcpp::export]]
Rcpp::List engine_info_internal(TessPtr ptr){
tesseract::TessBaseAPI * api = get_engine(ptr);
+#if TESSERACT_MAJOR_VERSION < 5
GenericVector<STRING> langs;
+#else
+ std::vector<std::string> langs;
+#endif
api->GetAvailableLanguagesAsVector(&langs);
Rcpp::CharacterVector available = Rcpp::CharacterVector::create();
+#if TESSERACT_MAJOR_VERSION < 5
for(int i = 0; i < langs.length(); i++)
available.push_back(langs.get(i).string());
+#else
+ for(int i = 0; i < langs.size(); i++)
+ available.push_back(langs[i]);
+#endif
langs.clear();
api->GetLoadedLanguagesAsVector(&langs);
Rcpp::CharacterVector loaded = Rcpp::CharacterVector::create();
+#if TESSERACT_MAJOR_VERSION < 5
for(int i = 0; i < langs.length(); i++)
loaded.push_back(langs.get(i).string());
+#else
+ for(int i = 0; i < langs.size(); i++)
+ loaded.push_back(langs[i]);
+#endif
return Rcpp::List::create(
#ifndef LEGACY_TESSERACT_API
Rcpp::_["datapath"] = api->GetDatapath(),
@@ -135,11 +159,19 @@ Rcpp::String print_params(std::string fi
// [[Rcpp::export]]
Rcpp::CharacterVector get_param_values(TessPtr ptr, Rcpp::CharacterVector params){
+#if TESSERACT_MAJOR_VERSION < 5
STRING str;
+#else
+ std::string str;
+#endif
tesseract::TessBaseAPI * api = get_engine(ptr);
Rcpp::CharacterVector out(params.length());
for(int i = 0; i < params.length(); i++)
+#if TESSERACT_MAJOR_VERSION < 5
out[i] = api->GetVariableAsString(params.at(i), &str) ? Rcpp::String(str.string()) : NA_STRING;
+#else
+ out[i] = api->GetVariableAsString(params.at(i), &str) ? Rcpp::String(str) : NA_STRING;
+#endif
return out;
}

View File

@ -1 +1 @@
SHA512 (tesseract_4.1.2.tar.gz) = 617046b97122531e7ddb44efacb971973e3cff2b7d7214f39ed5379db819c2caeeb88b1b0aebfdd2e09a975f394356692c1e1f0951da51d89301d78b0a9d96e8
SHA512 (tesseract_5.0.0.tar.gz) = eb29996d05d04fea8f784fa283660308f47b43d901d21c2d547ae2929eaf7771c2b998c9990a9bd4262b795013af55d8e5881cbb3aa6b5f5eb74150b913e35a0