diff --git a/.gitignore b/.gitignore index b7804dd..3e892f6 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ /tesseract_4.1.tar.gz /tesseract_4.1.1.tar.gz /tesseract_4.1.2.tar.gz +/tesseract_5.0.0.tar.gz diff --git a/R-tesseract.spec b/R-tesseract.spec index 535bf6c..c5e3535 100644 --- a/R-tesseract.spec +++ b/R-tesseract.spec @@ -2,11 +2,11 @@ %bcond_with network %global packname tesseract -%global packver 4.1.2 +%global packver 5.0.0 %global rlibdir %{_libdir}/R/library Name: R-%{packname} -Version: 4.1.2 +Version: 5.0.0 Release: %autorelease Summary: Open Source OCR Engine @@ -14,11 +14,6 @@ License: ASL 2.0 URL: https://CRAN.R-project.org/package=%{packname} Source0: https://cran.r-project.org/src/contrib/%{packname}_%{packver}.tar.gz -# Fix leptonica include -Patch0: R-tesseract_include.patch -# Add support for tesseract 5 -Patch1: R-tesseract_tesseract5.patch - # Here's the R view of the dependencies world: # Depends: # Imports: R-Rcpp >= 0.12.12, R-pdftools >= 1.5, R-curl, R-rappdirs, R-digest @@ -40,6 +35,10 @@ BuildRequires: R-tibble BuildRequires: R-rmarkdown BuildRequires: pkgconfig(lept) BuildRequires: pkgconfig(tesseract) +%if %{with network} +# For rmarkdown, because docs get built. +BuildRequires: glyphicons-halflings-fonts +%endif %description Bindings to 'Tesseract' : a diff --git a/R-tesseract_include.patch b/R-tesseract_include.patch deleted file mode 100644 index e71d5fe..0000000 --- a/R-tesseract_include.patch +++ /dev/null @@ -1,7 +0,0 @@ -diff -rupN --no-dereference a/tesseract/tools/test.cpp b/tesseract/tools/test.cpp ---- a/tesseract/tools/test.cpp 2020-03-30 00:30:33.000000000 +0200 -+++ b/tesseract/tools/test.cpp 2021-12-11 22:49:04.133669737 +0100 -@@ -1,2 +1,2 @@ - #include --#include -+#include diff --git a/R-tesseract_tesseract5.patch b/R-tesseract_tesseract5.patch deleted file mode 100644 index f12db7b..0000000 --- a/R-tesseract_tesseract5.patch +++ /dev/null @@ -1,86 +0,0 @@ -diff -rupN a/tesseract/src/tesseract.cpp b/tesseract/src/tesseract.cpp ---- a/tesseract/src/tesseract.cpp 2021-09-17 12:44:09.000000000 +0200 -+++ b/tesseract/src/tesseract.cpp 2021-12-14 11:41:43.435908296 +0100 -@@ -1,5 +1,7 @@ - #include "tesseract_types.h" -+#if TESSERACT_MAJOR_VERSION < 5 - #include -+#endif - - /* NB: libtesseract now insists that the engine is initiated in 'C' locale. - * We do this as exemplified in the example code in the libc manual: -@@ -42,7 +44,11 @@ Rcpp::List tesseract_config(){ - // [[Rcpp::export]] - TessPtr tesseract_engine_internal(Rcpp::CharacterVector datapath, Rcpp::CharacterVector language, Rcpp::CharacterVector confpaths, - Rcpp::CharacterVector opt_names, Rcpp::CharacterVector opt_values){ -+#if TESSERACT_MAJOR_VERSION < 5 - GenericVector params, values; -+#else -+ std::vector params, values; -+#endif - const char * path = NULL; - const char * lang = NULL; - char * configs[1000] = {0}; -@@ -88,7 +94,11 @@ TessPtr tesseract_engine_set_variable(Te - - // [[Rcpp::export]] - Rcpp::LogicalVector validate_params(Rcpp::CharacterVector params){ -+#if TESSERACT_MAJOR_VERSION < 5 - STRING str; -+#else -+ std::string str; -+#endif - tesseract::TessBaseAPI *api = make_analyze_api(); - Rcpp::LogicalVector out(params.length()); - for(int i = 0; i < params.length(); i++) -@@ -101,16 +111,30 @@ Rcpp::LogicalVector validate_params(Rcpp - // [[Rcpp::export]] - Rcpp::List engine_info_internal(TessPtr ptr){ - tesseract::TessBaseAPI * api = get_engine(ptr); -+#if TESSERACT_MAJOR_VERSION < 5 - GenericVector langs; -+#else -+ std::vector langs; -+#endif - api->GetAvailableLanguagesAsVector(&langs); - Rcpp::CharacterVector available = Rcpp::CharacterVector::create(); -+#if TESSERACT_MAJOR_VERSION < 5 - for(int i = 0; i < langs.length(); i++) - available.push_back(langs.get(i).string()); -+#else -+ for(int i = 0; i < langs.size(); i++) -+ available.push_back(langs[i]); -+#endif - langs.clear(); - api->GetLoadedLanguagesAsVector(&langs); - Rcpp::CharacterVector loaded = Rcpp::CharacterVector::create(); -+#if TESSERACT_MAJOR_VERSION < 5 - for(int i = 0; i < langs.length(); i++) - loaded.push_back(langs.get(i).string()); -+#else -+ for(int i = 0; i < langs.size(); i++) -+ loaded.push_back(langs[i]); -+#endif - return Rcpp::List::create( - #ifndef LEGACY_TESSERACT_API - Rcpp::_["datapath"] = api->GetDatapath(), -@@ -135,11 +159,19 @@ Rcpp::String print_params(std::string fi - - // [[Rcpp::export]] - Rcpp::CharacterVector get_param_values(TessPtr ptr, Rcpp::CharacterVector params){ -+#if TESSERACT_MAJOR_VERSION < 5 - STRING str; -+#else -+ std::string str; -+#endif - tesseract::TessBaseAPI * api = get_engine(ptr); - Rcpp::CharacterVector out(params.length()); - for(int i = 0; i < params.length(); i++) -+#if TESSERACT_MAJOR_VERSION < 5 - out[i] = api->GetVariableAsString(params.at(i), &str) ? Rcpp::String(str.string()) : NA_STRING; -+#else -+ out[i] = api->GetVariableAsString(params.at(i), &str) ? Rcpp::String(str) : NA_STRING; -+#endif - return out; - } - diff --git a/sources b/sources index a35d8fb..9cd7861 100644 --- a/sources +++ b/sources @@ -1 +1 @@ -SHA512 (tesseract_4.1.2.tar.gz) = 617046b97122531e7ddb44efacb971973e3cff2b7d7214f39ed5379db819c2caeeb88b1b0aebfdd2e09a975f394356692c1e1f0951da51d89301d78b0a9d96e8 +SHA512 (tesseract_5.0.0.tar.gz) = eb29996d05d04fea8f784fa283660308f47b43d901d21c2d547ae2929eaf7771c2b998c9990a9bd4262b795013af55d8e5881cbb3aa6b5f5eb74150b913e35a0