From 0068f25f71cd7b675ce52d50744752e19e58bd6e Mon Sep 17 00:00:00 2001 From: Karol Trzcionka Date: Wed, 21 Oct 2009 11:14:42 +0000 Subject: [PATCH 01/24] Update to v2.04 --- .cvsignore | 3 +-- sources | 3 +-- tesseract.spec | 19 ++++++++++--------- 3 files changed, 12 insertions(+), 13 deletions(-) diff --git a/.cvsignore b/.cvsignore index a79eb1a..5718bcb 100644 --- a/.cvsignore +++ b/.cvsignore @@ -1,2 +1 @@ -tesseract-2.03.tar.gz -tesseract-2.00.eng.tar.gz +tesseract-2.04.tar.gz diff --git a/sources b/sources index 2fbf4e7..ec0ec51 100644 --- a/sources +++ b/sources @@ -1,2 +1 @@ -5777b70b11df16c1ac9aa155d7cfc553 tesseract-2.03.tar.gz -b8291d6b3a63ce7879d688e845e341a9 tesseract-2.00.eng.tar.gz +b44eba1a9f4892ac62e484c807fe0533 tesseract-2.04.tar.gz diff --git a/tesseract.spec b/tesseract.spec index 93014c8..de59d6d 100644 --- a/tesseract.spec +++ b/tesseract.spec @@ -1,6 +1,6 @@ Name: tesseract -Version: 2.03 -Release: 4%{?dist} +Version: 2.04 +Release: 1%{?dist} Summary: Raw OCR Engine Group: Applications/File @@ -15,6 +15,7 @@ BuildRequires: libtiff-devel Summary: Development files for %{name} Group: Development/Libraries Requires: %{name} = %{version}-%{release} +Provides: %{name}-static = %{version}-%{release} %description A commercial quality OCR engine originally developed at HP between 1985 and @@ -30,11 +31,7 @@ developing applications that use %{name}. %build sed -i 's#-DTESSDATA_PREFIX=@datadir@/#-DTESSDATA_PREFIX=@datadir@/%{name}/##' ccutil/Makefile.* -find . -type f -exec sed -i 's/#include /#include \n#include /' {} \; ; -sed -i 's/#include /#include \n#include /' viewer/svmnode.cpp -sed -i 's/#include /#include \n#include \n#include /' viewer/svutil.cpp -sed -i 's/#include /#include \n#include /' viewer/scrollview.cpp -rm -f java/makefile +sed -i 's/#include /#include \n#include /' viewer/svutil.cpp %configure make %{?_smp_mflags} @@ -43,7 +40,6 @@ rm -rf $RPM_BUILD_ROOT make install DESTDIR=$RPM_BUILD_ROOT mkdir -p $RPM_BUILD_ROOT%{_datadir}/tesseract mv $RPM_BUILD_ROOT%{_datadir}/tessdata $RPM_BUILD_ROOT%{_datadir}/tesseract -rm -rf $RPM_BUILD_ROOT%{_libdir} rm $RPM_BUILD_ROOT%{_datadir}/%{name}/tessdata/{deu,fra,ita,nld,spa}* @@ -58,13 +54,18 @@ rm -rf $RPM_BUILD_ROOT %{_bindir}/wordlist2dawg %{_datadir}/%{name} -%doc AUTHORS ChangeLog COPYING NEWS phototest.tif README +%doc AUTHORS ChangeLog COPYING eurotext.tif NEWS phototest.tif README %files devel %defattr(-,root,root,-) %{_includedir}/%{name} +%{_libdir}/lib%{name}* %changelog +* Wed Oct 21 2009 Karol Trzcionka - 2.04-1 +- Update to v2.04 +- Add static libraries to -devel subpackage + * Sun Jul 26 2009 Fedora Release Engineering - 2.03-4 - Rebuilt for https://fedoraproject.org/wiki/Fedora_12_Mass_Rebuild From 3d69a6d1ea6738695d8bc70683d197e02d35ff23 Mon Sep 17 00:00:00 2001 From: Karol Trzcionka Date: Wed, 21 Oct 2009 11:16:23 +0000 Subject: [PATCH 02/24] Fix sources --- .cvsignore | 1 + sources | 1 + 2 files changed, 2 insertions(+) diff --git a/.cvsignore b/.cvsignore index 5718bcb..81f4911 100644 --- a/.cvsignore +++ b/.cvsignore @@ -1 +1,2 @@ tesseract-2.04.tar.gz +tesseract-2.00.eng.tar.gz diff --git a/sources b/sources index ec0ec51..f738c11 100644 --- a/sources +++ b/sources @@ -1 +1,2 @@ b44eba1a9f4892ac62e484c807fe0533 tesseract-2.04.tar.gz +b8291d6b3a63ce7879d688e845e341a9 tesseract-2.00.eng.tar.gz From 88f744d5248e9680fcafcab77521ffa6652020e1 Mon Sep 17 00:00:00 2001 From: Bill Nottingham Date: Wed, 25 Nov 2009 22:52:20 +0000 Subject: [PATCH 03/24] Fix typo that causes a failure to update the common directory. (releng #2781) --- Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index fddd865..603da57 100644 --- a/Makefile +++ b/Makefile @@ -1,10 +1,10 @@ # Makefile for source rpm: tesseract -# $Id$ +# $Id: Makefile,v 1.1 2007/06/11 03:33:03 kevin Exp $ NAME := tesseract SPECFILE = $(firstword $(wildcard *.spec)) define find-makefile-common -for d in common ../common ../../common ; do if [ -f $$d/Makefile.common ] ; then if [ -f $$d/CVS/Root -a -w $$/Makefile.common ] ; then cd $$d ; cvs -Q update ; fi ; echo "$$d/Makefile.common" ; break ; fi ; done +for d in common ../common ../../common ; do if [ -f $$d/Makefile.common ] ; then if [ -f $$d/CVS/Root -a -w $$d/Makefile.common ] ; then cd $$d ; cvs -Q update ; fi ; echo "$$d/Makefile.common" ; break ; fi ; done endef MAKEFILE_COMMON := $(shell $(find-makefile-common)) From 9fe33f1531d5747b10bfad19e13d0767b46461da Mon Sep 17 00:00:00 2001 From: Fedora Release Engineering Date: Thu, 29 Jul 2010 14:03:36 +0000 Subject: [PATCH 04/24] dist-git conversion --- .cvsignore => .gitignore | 0 Makefile | 21 --------------------- 2 files changed, 21 deletions(-) rename .cvsignore => .gitignore (100%) delete mode 100644 Makefile diff --git a/.cvsignore b/.gitignore similarity index 100% rename from .cvsignore rename to .gitignore diff --git a/Makefile b/Makefile deleted file mode 100644 index 603da57..0000000 --- a/Makefile +++ /dev/null @@ -1,21 +0,0 @@ -# Makefile for source rpm: tesseract -# $Id: Makefile,v 1.1 2007/06/11 03:33:03 kevin Exp $ -NAME := tesseract -SPECFILE = $(firstword $(wildcard *.spec)) - -define find-makefile-common -for d in common ../common ../../common ; do if [ -f $$d/Makefile.common ] ; then if [ -f $$d/CVS/Root -a -w $$d/Makefile.common ] ; then cd $$d ; cvs -Q update ; fi ; echo "$$d/Makefile.common" ; break ; fi ; done -endef - -MAKEFILE_COMMON := $(shell $(find-makefile-common)) - -ifeq ($(MAKEFILE_COMMON),) -# attept a checkout -define checkout-makefile-common -test -f CVS/Root && { cvs -Q -d $$(cat CVS/Root) checkout common && echo "common/Makefile.common" ; } || { echo "ERROR: I can't figure out how to checkout the 'common' module." ; exit -1 ; } >&2 -endef - -MAKEFILE_COMMON := $(shell $(checkout-makefile-common)) -endif - -include $(MAKEFILE_COMMON) From bd519b6c08fdd52d70bce62f0ee9b819a86bfdd2 Mon Sep 17 00:00:00 2001 From: Karol Trzcionka Date: Tue, 16 Nov 2010 22:15:21 +0100 Subject: [PATCH 05/24] Update to v3.0.0 --- .gitignore | 2 ++ sources | 4 ++-- tesseract.spec | 30 ++++++++++++++++++++---------- 3 files changed, 24 insertions(+), 12 deletions(-) diff --git a/.gitignore b/.gitignore index 81f4911..3e387cf 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,4 @@ tesseract-2.04.tar.gz tesseract-2.00.eng.tar.gz +/tesseract-3.00.tar.gz +/eng.traineddata.gz diff --git a/sources b/sources index f738c11..f4f8670 100644 --- a/sources +++ b/sources @@ -1,2 +1,2 @@ -b44eba1a9f4892ac62e484c807fe0533 tesseract-2.04.tar.gz -b8291d6b3a63ce7879d688e845e341a9 tesseract-2.00.eng.tar.gz +cc812a261088ea0c3d2da735be35d09f tesseract-3.00.tar.gz +d91041ad156cf2db36664e91ef799451 eng.traineddata.gz diff --git a/tesseract.spec b/tesseract.spec index de59d6d..8901ff6 100644 --- a/tesseract.spec +++ b/tesseract.spec @@ -1,5 +1,5 @@ Name: tesseract -Version: 2.04 +Version: 3.00 Release: 1%{?dist} Summary: Raw OCR Engine @@ -7,7 +7,7 @@ Group: Applications/File License: ASL 2.0 URL: http://code.google.com/p/tesseract-ocr/ Source0: http://tesseract-ocr.googlecode.com/files/%{name}-%{version}.tar.gz -Source1: http://tesseract-ocr.googlecode.com/files/%{name}-2.00.eng.tar.gz +Source1: http://tesseract-ocr.googlecode.com/files/eng.traineddata.gz BuildRoot: %{_tmppath}/%{name}-%{version}-%{release}-root-%(%{__id_u} -n) BuildRequires: libtiff-devel @@ -15,7 +15,6 @@ BuildRequires: libtiff-devel Summary: Development files for %{name} Group: Development/Libraries Requires: %{name} = %{version}-%{release} -Provides: %{name}-static = %{version}-%{release} %description A commercial quality OCR engine originally developed at HP between 1985 and @@ -27,41 +26,52 @@ The %{name}-devel package contains header file for developing applications that use %{name}. %prep -%setup -q -a 1 +%setup -q +gzip -dc %{SOURCE1} > eng.traineddata %build sed -i 's#-DTESSDATA_PREFIX=@datadir@/#-DTESSDATA_PREFIX=@datadir@/%{name}/##' ccutil/Makefile.* -sed -i 's/#include /#include \n#include /' viewer/svutil.cpp -%configure +%configure --disable-static +sed -i 's|^hardcode_libdir_flag_spec=.*|hardcode_libdir_flag_spec=""|g' libtool +sed -i 's|^runpath_var=LD_RUN_PATH|runpath_var=DIE_RPATH_DIE|g' libtool make %{?_smp_mflags} %install rm -rf $RPM_BUILD_ROOT make install DESTDIR=$RPM_BUILD_ROOT +rm -f $RPM_BUILD_ROOT%{_libdir}/*la mkdir -p $RPM_BUILD_ROOT%{_datadir}/tesseract -mv $RPM_BUILD_ROOT%{_datadir}/tessdata $RPM_BUILD_ROOT%{_datadir}/tesseract -rm $RPM_BUILD_ROOT%{_datadir}/%{name}/tessdata/{deu,fra,ita,nld,spa}* - +mv $RPM_BUILD_ROOT%{_datadir}/tessdata $RPM_BUILD_ROOT%{_datadir}/%{name} +install -m 0644 eng.traineddata $RPM_BUILD_ROOT%{_datadir}/%{name}/tessdata %clean rm -rf $RPM_BUILD_ROOT +%post -p /sbin/ldconfig +%postun -p /sbin/ldconfig + %files %defattr(-,root,root,-) %{_bindir}/%{name} +%{_bindir}/combine_tessdata %{_bindir}/*training %{_bindir}/unicharset_extractor %{_bindir}/wordlist2dawg %{_datadir}/%{name} +%{_libdir}/lib%{name}*.so.* %doc AUTHORS ChangeLog COPYING eurotext.tif NEWS phototest.tif README %files devel %defattr(-,root,root,-) %{_includedir}/%{name} -%{_libdir}/lib%{name}* +%{_libdir}/lib%{name}*.so %changelog +* Tue Nov 16 2010 Karol Trzcionka - 3.00-1 +- Update to v3.00 +- Remove static libs and add dynamic + * Wed Oct 21 2009 Karol Trzcionka - 2.04-1 - Update to v2.04 - Add static libraries to -devel subpackage From 740927bc18e5b4fbcb2d51b9e933f9619b20a271 Mon Sep 17 00:00:00 2001 From: Dennis Gilmore Date: Wed, 9 Feb 2011 12:46:18 -0600 Subject: [PATCH 06/24] - Rebuilt for https://fedoraproject.org/wiki/Fedora_15_Mass_Rebuild --- tesseract.spec | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tesseract.spec b/tesseract.spec index 8901ff6..4669ce5 100644 --- a/tesseract.spec +++ b/tesseract.spec @@ -1,6 +1,6 @@ Name: tesseract Version: 3.00 -Release: 1%{?dist} +Release: 2%{?dist} Summary: Raw OCR Engine Group: Applications/File @@ -68,6 +68,9 @@ rm -rf $RPM_BUILD_ROOT %{_libdir}/lib%{name}*.so %changelog +* Wed Feb 09 2011 Fedora Release Engineering - 3.00-2 +- Rebuilt for https://fedoraproject.org/wiki/Fedora_15_Mass_Rebuild + * Tue Nov 16 2010 Karol Trzcionka - 3.00-1 - Update to v3.00 - Remove static libs and add dynamic From 348296b75898add9b89b2a9b26f4ed33bfd97556 Mon Sep 17 00:00:00 2001 From: Dennis Gilmore Date: Sat, 14 Jan 2012 00:34:42 -0600 Subject: [PATCH 07/24] - Rebuilt for https://fedoraproject.org/wiki/Fedora_17_Mass_Rebuild --- tesseract.spec | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tesseract.spec b/tesseract.spec index 4669ce5..ba5e504 100644 --- a/tesseract.spec +++ b/tesseract.spec @@ -1,6 +1,6 @@ Name: tesseract Version: 3.00 -Release: 2%{?dist} +Release: 3%{?dist} Summary: Raw OCR Engine Group: Applications/File @@ -68,6 +68,9 @@ rm -rf $RPM_BUILD_ROOT %{_libdir}/lib%{name}*.so %changelog +* Sat Jan 14 2012 Fedora Release Engineering - 3.00-3 +- Rebuilt for https://fedoraproject.org/wiki/Fedora_17_Mass_Rebuild + * Wed Feb 09 2011 Fedora Release Engineering - 3.00-2 - Rebuilt for https://fedoraproject.org/wiki/Fedora_15_Mass_Rebuild From 4ee36f08d44e5c12783fec77a3912caccf73c10f Mon Sep 17 00:00:00 2001 From: Dennis Gilmore Date: Tue, 28 Feb 2012 14:21:25 -0600 Subject: [PATCH 08/24] - Rebuilt for c++ ABI breakage --- tesseract.spec | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tesseract.spec b/tesseract.spec index ba5e504..32fb997 100644 --- a/tesseract.spec +++ b/tesseract.spec @@ -1,6 +1,6 @@ Name: tesseract Version: 3.00 -Release: 3%{?dist} +Release: 4%{?dist} Summary: Raw OCR Engine Group: Applications/File @@ -68,6 +68,9 @@ rm -rf $RPM_BUILD_ROOT %{_libdir}/lib%{name}*.so %changelog +* Tue Feb 28 2012 Fedora Release Engineering - 3.00-4 +- Rebuilt for c++ ABI breakage + * Sat Jan 14 2012 Fedora Release Engineering - 3.00-3 - Rebuilt for https://fedoraproject.org/wiki/Fedora_17_Mass_Rebuild From 6e8023589c5b1c323dda32532615396684c347b3 Mon Sep 17 00:00:00 2001 From: Dennis Gilmore Date: Sat, 21 Jul 2012 18:15:28 -0500 Subject: [PATCH 09/24] - Rebuilt for https://fedoraproject.org/wiki/Fedora_18_Mass_Rebuild --- tesseract.spec | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tesseract.spec b/tesseract.spec index 32fb997..d4c3e6c 100644 --- a/tesseract.spec +++ b/tesseract.spec @@ -1,6 +1,6 @@ Name: tesseract Version: 3.00 -Release: 4%{?dist} +Release: 5%{?dist} Summary: Raw OCR Engine Group: Applications/File @@ -68,6 +68,9 @@ rm -rf $RPM_BUILD_ROOT %{_libdir}/lib%{name}*.so %changelog +* Sat Jul 21 2012 Fedora Release Engineering - 3.00-5 +- Rebuilt for https://fedoraproject.org/wiki/Fedora_18_Mass_Rebuild + * Tue Feb 28 2012 Fedora Release Engineering - 3.00-4 - Rebuilt for c++ ABI breakage From 3d26c66db1bb4daa846b7d420b54b0a7617388f0 Mon Sep 17 00:00:00 2001 From: Kevin Kofler Date: Tue, 31 Jul 2012 23:35:21 +0200 Subject: [PATCH 10/24] Fix FTBFS with g++ 4.7 * Tue Jul 31 2012 Kevin Kofler - 3.00-6 - Fix FTBFS with g++ 4.7 --- tesseract-3.00-gcc47.patch | 11 +++++++++++ tesseract.spec | 8 +++++++- 2 files changed, 18 insertions(+), 1 deletion(-) create mode 100644 tesseract-3.00-gcc47.patch diff --git a/tesseract-3.00-gcc47.patch b/tesseract-3.00-gcc47.patch new file mode 100644 index 0000000..22e6c6d --- /dev/null +++ b/tesseract-3.00-gcc47.patch @@ -0,0 +1,11 @@ +diff -ur tesseract-3.00/viewer/svutil.cpp tesseract-3.00-gcc47/viewer/svutil.cpp +--- tesseract-3.00/viewer/svutil.cpp 2010-09-30 17:22:07.000000000 +0200 ++++ tesseract-3.00-gcc47/viewer/svutil.cpp 2012-07-31 23:30:16.000000000 +0200 +@@ -39,6 +39,7 @@ + #include + #include + #include ++#include + #include + #include + #ifdef __linux__ diff --git a/tesseract.spec b/tesseract.spec index d4c3e6c..5eef6fe 100644 --- a/tesseract.spec +++ b/tesseract.spec @@ -1,6 +1,6 @@ Name: tesseract Version: 3.00 -Release: 5%{?dist} +Release: 6%{?dist} Summary: Raw OCR Engine Group: Applications/File @@ -8,6 +8,8 @@ License: ASL 2.0 URL: http://code.google.com/p/tesseract-ocr/ Source0: http://tesseract-ocr.googlecode.com/files/%{name}-%{version}.tar.gz Source1: http://tesseract-ocr.googlecode.com/files/eng.traineddata.gz +# fix FTBFS with g++ 4.7 +Patch0: tesseract-3.00-gcc47.patch BuildRoot: %{_tmppath}/%{name}-%{version}-%{release}-root-%(%{__id_u} -n) BuildRequires: libtiff-devel @@ -28,6 +30,7 @@ developing applications that use %{name}. %prep %setup -q gzip -dc %{SOURCE1} > eng.traineddata +%patch0 -p1 -b .gcc47 %build sed -i 's#-DTESSDATA_PREFIX=@datadir@/#-DTESSDATA_PREFIX=@datadir@/%{name}/##' ccutil/Makefile.* @@ -68,6 +71,9 @@ rm -rf $RPM_BUILD_ROOT %{_libdir}/lib%{name}*.so %changelog +* Tue Jul 31 2012 Kevin Kofler - 3.00-6 +- Fix FTBFS with g++ 4.7 + * Sat Jul 21 2012 Fedora Release Engineering - 3.00-5 - Rebuilt for https://fedoraproject.org/wiki/Fedora_18_Mass_Rebuild From 0b725ccc42b734a612e69300dd4c29cbf876c162 Mon Sep 17 00:00:00 2001 From: Karol Trzcionka Date: Sat, 6 Oct 2012 23:36:44 +0200 Subject: [PATCH 11/24] Update to v3.01 --- .gitignore | 2 ++ sources | 4 ++-- tesseract.spec | 20 +++++++++++++++----- 3 files changed, 19 insertions(+), 7 deletions(-) diff --git a/.gitignore b/.gitignore index 3e387cf..52e60b0 100644 --- a/.gitignore +++ b/.gitignore @@ -2,3 +2,5 @@ tesseract-2.04.tar.gz tesseract-2.00.eng.tar.gz /tesseract-3.00.tar.gz /eng.traineddata.gz +/tesseract-3.01.tar.gz +/tesseract-ocr-3.01.eng.tar.gz diff --git a/sources b/sources index f4f8670..e5d8488 100644 --- a/sources +++ b/sources @@ -1,2 +1,2 @@ -cc812a261088ea0c3d2da735be35d09f tesseract-3.00.tar.gz -d91041ad156cf2db36664e91ef799451 eng.traineddata.gz +1ba496e51a42358fb9d3ffe781b2d20a tesseract-3.01.tar.gz +89c139a73e0e7b1225809fc7b226b6c9 tesseract-ocr-3.01.eng.tar.gz diff --git a/tesseract.spec b/tesseract.spec index 5eef6fe..e260b52 100644 --- a/tesseract.spec +++ b/tesseract.spec @@ -1,17 +1,19 @@ Name: tesseract -Version: 3.00 -Release: 6%{?dist} +Version: 3.01 +Release: 1%{?dist} Summary: Raw OCR Engine Group: Applications/File License: ASL 2.0 URL: http://code.google.com/p/tesseract-ocr/ Source0: http://tesseract-ocr.googlecode.com/files/%{name}-%{version}.tar.gz -Source1: http://tesseract-ocr.googlecode.com/files/eng.traineddata.gz +Source1: http://tesseract-ocr.googlecode.com/files/%{name}-ocr-3.01.eng.tar.gz # fix FTBFS with g++ 4.7 Patch0: tesseract-3.00-gcc47.patch BuildRoot: %{_tmppath}/%{name}-%{version}-%{release}-root-%(%{__id_u} -n) BuildRequires: libtiff-devel +BuildRequires: leptonica-devel +BuildRequires: automake,libtool %package devel Summary: Development files for %{name} @@ -29,11 +31,12 @@ developing applications that use %{name}. %prep %setup -q -gzip -dc %{SOURCE1} > eng.traineddata +gzip -dc %{SOURCE1} | tar -xf - %patch0 -p1 -b .gcc47 %build sed -i 's#-DTESSDATA_PREFIX=@datadir@/#-DTESSDATA_PREFIX=@datadir@/%{name}/##' ccutil/Makefile.* +./autogen.sh %configure --disable-static sed -i 's|^hardcode_libdir_flag_spec=.*|hardcode_libdir_flag_spec=""|g' libtool sed -i 's|^runpath_var=LD_RUN_PATH|runpath_var=DIE_RPATH_DIE|g' libtool @@ -45,7 +48,7 @@ make install DESTDIR=$RPM_BUILD_ROOT rm -f $RPM_BUILD_ROOT%{_libdir}/*la mkdir -p $RPM_BUILD_ROOT%{_datadir}/tesseract mv $RPM_BUILD_ROOT%{_datadir}/tessdata $RPM_BUILD_ROOT%{_datadir}/%{name} -install -m 0644 eng.traineddata $RPM_BUILD_ROOT%{_datadir}/%{name}/tessdata +install -m 0644 %{name}-ocr/tessdata/* $RPM_BUILD_ROOT%{_datadir}/%{name}/tessdata %clean rm -rf $RPM_BUILD_ROOT @@ -62,6 +65,8 @@ rm -rf $RPM_BUILD_ROOT %{_bindir}/wordlist2dawg %{_datadir}/%{name} %{_libdir}/lib%{name}*.so.* +%{_mandir}/man1/* +%{_mandir}/man5/* %doc AUTHORS ChangeLog COPYING eurotext.tif NEWS phototest.tif README @@ -71,6 +76,11 @@ rm -rf $RPM_BUILD_ROOT %{_libdir}/lib%{name}*.so %changelog +* Sat Oct 06 2012 Karol Trzcionka - 3.01-1 +- Update to v3.01 +- Add manual pages +- Add BRs leptonica, automake + * Tue Jul 31 2012 Kevin Kofler - 3.00-6 - Fix FTBFS with g++ 4.7 From 83909c771e52bc22c382a8d4b6107ee1f9a9d0a7 Mon Sep 17 00:00:00 2001 From: Dennis Gilmore Date: Thu, 14 Feb 2013 19:30:50 -0600 Subject: [PATCH 12/24] - Rebuilt for https://fedoraproject.org/wiki/Fedora_19_Mass_Rebuild --- tesseract.spec | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tesseract.spec b/tesseract.spec index e260b52..51098ba 100644 --- a/tesseract.spec +++ b/tesseract.spec @@ -1,6 +1,6 @@ Name: tesseract Version: 3.01 -Release: 1%{?dist} +Release: 2%{?dist} Summary: Raw OCR Engine Group: Applications/File @@ -76,6 +76,9 @@ rm -rf $RPM_BUILD_ROOT %{_libdir}/lib%{name}*.so %changelog +* Fri Feb 15 2013 Fedora Release Engineering - 3.01-2 +- Rebuilt for https://fedoraproject.org/wiki/Fedora_19_Mass_Rebuild + * Sat Oct 06 2012 Karol Trzcionka - 3.01-1 - Update to v3.01 - Add manual pages From cbfbfda78649f7d945803568ad55d45216517cf4 Mon Sep 17 00:00:00 2001 From: Karol Trzcionka Date: Sat, 27 Apr 2013 21:05:06 +0200 Subject: [PATCH 13/24] Update to v3.02.02 --- .gitignore | 2 ++ sources | 4 ++-- tesseract-3.00-gcc47.patch | 11 ---------- tesseract-pkgconfig.patch | 14 +++++++++++++ tesseract.spec | 41 ++++++++++++++++++++++---------------- 5 files changed, 42 insertions(+), 30 deletions(-) delete mode 100644 tesseract-3.00-gcc47.patch create mode 100644 tesseract-pkgconfig.patch diff --git a/.gitignore b/.gitignore index 52e60b0..6115873 100644 --- a/.gitignore +++ b/.gitignore @@ -4,3 +4,5 @@ tesseract-2.00.eng.tar.gz /eng.traineddata.gz /tesseract-3.01.tar.gz /tesseract-ocr-3.01.eng.tar.gz +/tesseract-ocr-3.02.02.tar.gz +/tesseract-ocr-3.02.eng.tar.gz diff --git a/sources b/sources index e5d8488..905fcff 100644 --- a/sources +++ b/sources @@ -1,2 +1,2 @@ -1ba496e51a42358fb9d3ffe781b2d20a tesseract-3.01.tar.gz -89c139a73e0e7b1225809fc7b226b6c9 tesseract-ocr-3.01.eng.tar.gz +26adc8154f0e815053816825dde246e6 tesseract-ocr-3.02.02.tar.gz +3562250fe6f4e76229a329166b8ae853 tesseract-ocr-3.02.eng.tar.gz diff --git a/tesseract-3.00-gcc47.patch b/tesseract-3.00-gcc47.patch deleted file mode 100644 index 22e6c6d..0000000 --- a/tesseract-3.00-gcc47.patch +++ /dev/null @@ -1,11 +0,0 @@ -diff -ur tesseract-3.00/viewer/svutil.cpp tesseract-3.00-gcc47/viewer/svutil.cpp ---- tesseract-3.00/viewer/svutil.cpp 2010-09-30 17:22:07.000000000 +0200 -+++ tesseract-3.00-gcc47/viewer/svutil.cpp 2012-07-31 23:30:16.000000000 +0200 -@@ -39,6 +39,7 @@ - #include - #include - #include -+#include - #include - #include - #ifdef __linux__ diff --git a/tesseract-pkgconfig.patch b/tesseract-pkgconfig.patch new file mode 100644 index 0000000..35a32f2 --- /dev/null +++ b/tesseract-pkgconfig.patch @@ -0,0 +1,14 @@ +diff -rupN tesseract-ocr/tesseract.pc.in tesseract-ocr-new/tesseract.pc.in +--- tesseract-ocr/tesseract.pc.in 2012-09-29 13:44:44.000000000 +0200 ++++ tesseract-ocr-new/tesseract.pc.in 2013-01-02 01:19:23.000000000 +0100 +@@ -10,6 +10,7 @@ Name: @PACKAGE_NAME@ + Description: An OCR Engine that was developed at HP Labs between 1985 and 1995... and now at Google. + URL: https://code.google.com/p/tesseract-ocr + Version: @VERSION@ +-# Requires: lept ## leptonica do not provide lept.pc +-Libs: -L${libdir} -ltesseract @LDFLAGS@ @LIBS@ +-Cflags: -I${includedir} @CFLAGS@ @CPPFLAGS@ @CXXFLAGS@ @DEFS@ ++Requires.private: lept ++Libs: -L${libdir} -ltesseract ++Libs.private: -lpthread ++Cflags: -I${includedir} diff --git a/tesseract.spec b/tesseract.spec index 51098ba..d720e21 100644 --- a/tesseract.spec +++ b/tesseract.spec @@ -1,22 +1,23 @@ +%global fullname tesseract-ocr Name: tesseract -Version: 3.01 -Release: 2%{?dist} +Version: 3.02.02 +Release: 1%{?dist} Summary: Raw OCR Engine Group: Applications/File License: ASL 2.0 -URL: http://code.google.com/p/tesseract-ocr/ -Source0: http://tesseract-ocr.googlecode.com/files/%{name}-%{version}.tar.gz -Source1: http://tesseract-ocr.googlecode.com/files/%{name}-ocr-3.01.eng.tar.gz -# fix FTBFS with g++ 4.7 -Patch0: tesseract-3.00-gcc47.patch +URL: http://code.google.com/p/%{fullname}/ +Source0: http://tesseract-ocr.googlecode.com/files/%{fullname}-%{version}.tar.gz +Source1: http://tesseract-ocr.googlecode.com/files/%{fullname}-3.02.eng.tar.gz +Patch0: %{name}-pkgconfig.patch BuildRoot: %{_tmppath}/%{name}-%{version}-%{release}-root-%(%{__id_u} -n) BuildRequires: libtiff-devel BuildRequires: leptonica-devel BuildRequires: automake,libtool +Obsoletes: tesseract < 3.02.02 %package devel -Summary: Development files for %{name} +Summary: Development files for %{fullname} Group: Development/Libraries Requires: %{name} = %{version}-%{release} @@ -30,13 +31,12 @@ The %{name}-devel package contains header file for developing applications that use %{name}. %prep -%setup -q -gzip -dc %{SOURCE1} | tar -xf - -%patch0 -p1 -b .gcc47 +%setup -q -n %{fullname} +%setup -q -a 1 -n %{fullname} +%patch0 -p1 %build sed -i 's#-DTESSDATA_PREFIX=@datadir@/#-DTESSDATA_PREFIX=@datadir@/%{name}/##' ccutil/Makefile.* -./autogen.sh %configure --disable-static sed -i 's|^hardcode_libdir_flag_spec=.*|hardcode_libdir_flag_spec=""|g' libtool sed -i 's|^runpath_var=LD_RUN_PATH|runpath_var=DIE_RPATH_DIE|g' libtool @@ -46,9 +46,9 @@ make %{?_smp_mflags} rm -rf $RPM_BUILD_ROOT make install DESTDIR=$RPM_BUILD_ROOT rm -f $RPM_BUILD_ROOT%{_libdir}/*la -mkdir -p $RPM_BUILD_ROOT%{_datadir}/tesseract +mkdir -p $RPM_BUILD_ROOT%{_datadir}/%{name} mv $RPM_BUILD_ROOT%{_datadir}/tessdata $RPM_BUILD_ROOT%{_datadir}/%{name} -install -m 0644 %{name}-ocr/tessdata/* $RPM_BUILD_ROOT%{_datadir}/%{name}/tessdata +install -m 0644 %{fullname}/tessdata/* $RPM_BUILD_ROOT%{_datadir}/%{name}/tessdata %clean rm -rf $RPM_BUILD_ROOT @@ -57,10 +57,13 @@ rm -rf $RPM_BUILD_ROOT %postun -p /sbin/ldconfig %files -%defattr(-,root,root,-) -%{_bindir}/%{name} +%{_bindir}/ambiguous_words +%{_bindir}/classifier_tester %{_bindir}/combine_tessdata +%{_bindir}/dawg2wordlist +%{_bindir}/shapeclustering %{_bindir}/*training +%{_bindir}/%{name} %{_bindir}/unicharset_extractor %{_bindir}/wordlist2dawg %{_datadir}/%{name} @@ -71,11 +74,15 @@ rm -rf $RPM_BUILD_ROOT %doc AUTHORS ChangeLog COPYING eurotext.tif NEWS phototest.tif README %files devel -%defattr(-,root,root,-) %{_includedir}/%{name} %{_libdir}/lib%{name}*.so +%{_libdir}/pkgconfig/%{name}.pc %changelog +* Sat Apr 27 2013 Karol Trzcionka - 3.02.02-1 +- Update to v3.02.02 +- Apply pkgconfig patch rhbz#904806 + * Fri Feb 15 2013 Fedora Release Engineering - 3.01-2 - Rebuilt for https://fedoraproject.org/wiki/Fedora_19_Mass_Rebuild From a0c433bed82ec065cdeb1baf7bb66c9b17691bbf Mon Sep 17 00:00:00 2001 From: Dennis Gilmore Date: Sat, 3 Aug 2013 22:44:14 -0500 Subject: [PATCH 14/24] - Rebuilt for https://fedoraproject.org/wiki/Fedora_20_Mass_Rebuild --- tesseract.spec | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tesseract.spec b/tesseract.spec index d720e21..177b004 100644 --- a/tesseract.spec +++ b/tesseract.spec @@ -1,7 +1,7 @@ %global fullname tesseract-ocr Name: tesseract Version: 3.02.02 -Release: 1%{?dist} +Release: 2%{?dist} Summary: Raw OCR Engine Group: Applications/File @@ -79,6 +79,9 @@ rm -rf $RPM_BUILD_ROOT %{_libdir}/pkgconfig/%{name}.pc %changelog +* Sun Aug 04 2013 Fedora Release Engineering - 3.02.02-2 +- Rebuilt for https://fedoraproject.org/wiki/Fedora_20_Mass_Rebuild + * Sat Apr 27 2013 Karol Trzcionka - 3.02.02-1 - Update to v3.02.02 - Apply pkgconfig patch rhbz#904806 From f07f835bfcf28ecd4bd429d59f1d58b01a6cbd69 Mon Sep 17 00:00:00 2001 From: Karol Trzcionka Date: Thu, 27 Mar 2014 19:45:03 +0100 Subject: [PATCH 15/24] Fix rhbz#1037350 (-Werror=format-security) Add OSD data Remove BuildRoot tag --- .gitignore | 1 + sources | 1 + tesseract-3.02.02-format.patch | 12 ++++++++++++ tesseract.spec | 31 +++++++++++++++++++++++++++---- 4 files changed, 41 insertions(+), 4 deletions(-) create mode 100644 tesseract-3.02.02-format.patch diff --git a/.gitignore b/.gitignore index 6115873..31a4eb6 100644 --- a/.gitignore +++ b/.gitignore @@ -6,3 +6,4 @@ tesseract-2.00.eng.tar.gz /tesseract-ocr-3.01.eng.tar.gz /tesseract-ocr-3.02.02.tar.gz /tesseract-ocr-3.02.eng.tar.gz +/tesseract-ocr-3.01.osd.tar.gz diff --git a/sources b/sources index 905fcff..22c49be 100644 --- a/sources +++ b/sources @@ -1,2 +1,3 @@ 26adc8154f0e815053816825dde246e6 tesseract-ocr-3.02.02.tar.gz 3562250fe6f4e76229a329166b8ae853 tesseract-ocr-3.02.eng.tar.gz +683486e01f5b87c17f2f5815f770ccb3 tesseract-ocr-3.01.osd.tar.gz diff --git a/tesseract-3.02.02-format.patch b/tesseract-3.02.02-format.patch new file mode 100644 index 0000000..45a4136 --- /dev/null +++ b/tesseract-3.02.02-format.patch @@ -0,0 +1,12 @@ +diff -up ./dict/permdawg.cpp.format ./dict/permdawg.cpp +--- ./dict/permdawg.cpp.format 2012-09-02 22:08:43.000000000 +0200 ++++ ./dict/permdawg.cpp 2014-03-27 18:38:40.026525432 +0100 +@@ -205,7 +205,7 @@ void Dict::go_deeper_dawg_fxn( + STRING word_str; + word->string_and_lengths(&word_str, NULL); + word_str += " "; +- fprintf(output_ambig_words_file_, word_str.string()); ++ fprintf(output_ambig_words_file_, "%s", word_str.string()); + } + WERD_CHOICE *adjusted_word = word; + WERD_CHOICE hyphen_tail_word(&getUnicharset()); diff --git a/tesseract.spec b/tesseract.spec index 177b004..bb5812e 100644 --- a/tesseract.spec +++ b/tesseract.spec @@ -1,7 +1,7 @@ %global fullname tesseract-ocr Name: tesseract Version: 3.02.02 -Release: 2%{?dist} +Release: 3%{?dist} Summary: Raw OCR Engine Group: Applications/File @@ -9,8 +9,9 @@ License: ASL 2.0 URL: http://code.google.com/p/%{fullname}/ Source0: http://tesseract-ocr.googlecode.com/files/%{fullname}-%{version}.tar.gz Source1: http://tesseract-ocr.googlecode.com/files/%{fullname}-3.02.eng.tar.gz +Source2: http://tesseract-ocr.googlecode.com/files/%{fullname}-3.01.osd.tar.gz Patch0: %{name}-pkgconfig.patch -BuildRoot: %{_tmppath}/%{name}-%{version}-%{release}-root-%(%{__id_u} -n) +Patch1: %{name}-%{version}-format.patch BuildRequires: libtiff-devel BuildRequires: leptonica-devel BuildRequires: automake,libtool @@ -21,6 +22,11 @@ Summary: Development files for %{fullname} Group: Development/Libraries Requires: %{name} = %{version}-%{release} +%package osd +Summary: Orientation & Script Detection Data for %{fullname} +Group: Applications/File +Requires: %{name} = %{version}-%{release} + %description A commercial quality OCR engine originally developed at HP between 1985 and 1995. In 1995, this engine was among the top 3 evaluated by UNLV. It was @@ -30,10 +36,15 @@ open-sourced by HP and UNLV in 2005. The %{name}-devel package contains header file for developing applications that use %{name}. +%description osd +Orientation & Script Detection Data for %{fullname} + %prep %setup -q -n %{fullname} -%setup -q -a 1 -n %{fullname} +%setup -q -a 1 -D -n %{fullname} +%setup -q -a 2 -D -n %{fullname} %patch0 -p1 +%patch1 -p1 %build sed -i 's#-DTESSDATA_PREFIX=@datadir@/#-DTESSDATA_PREFIX=@datadir@/%{name}/##' ccutil/Makefile.* @@ -66,7 +77,11 @@ rm -rf $RPM_BUILD_ROOT %{_bindir}/%{name} %{_bindir}/unicharset_extractor %{_bindir}/wordlist2dawg -%{_datadir}/%{name} +%dir %{_datadir}/%{name} +%dir %{_datadir}/%{name}/tessdata +%{_datadir}/%{name}/tessdata/configs +%{_datadir}/%{name}/tessdata/tessconfigs +%{_datadir}/%{name}/tessdata/eng.* %{_libdir}/lib%{name}*.so.* %{_mandir}/man1/* %{_mandir}/man5/* @@ -78,7 +93,15 @@ rm -rf $RPM_BUILD_ROOT %{_libdir}/lib%{name}*.so %{_libdir}/pkgconfig/%{name}.pc +%files osd +%{_datadir}/%{name}/tessdata/osd.traineddata + %changelog +* Thu Mar 27 2014 Karol Trzcionka - 3.02.02-3 +- Fix rhbz#1037350 (-Werror=format-security) +- Add OSD data +- Remove BuildRoot tag + * Sun Aug 04 2013 Fedora Release Engineering - 3.02.02-2 - Rebuilt for https://fedoraproject.org/wiki/Fedora_20_Mass_Rebuild From a9bb7768a7600b48aab7ca849ddad93a240ee3a1 Mon Sep 17 00:00:00 2001 From: Dennis Gilmore Date: Sun, 8 Jun 2014 03:22:53 -0500 Subject: [PATCH 16/24] - Rebuilt for https://fedoraproject.org/wiki/Fedora_21_Mass_Rebuild --- tesseract.spec | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tesseract.spec b/tesseract.spec index bb5812e..a6223d5 100644 --- a/tesseract.spec +++ b/tesseract.spec @@ -1,7 +1,7 @@ %global fullname tesseract-ocr Name: tesseract Version: 3.02.02 -Release: 3%{?dist} +Release: 4%{?dist} Summary: Raw OCR Engine Group: Applications/File @@ -97,6 +97,9 @@ rm -rf $RPM_BUILD_ROOT %{_datadir}/%{name}/tessdata/osd.traineddata %changelog +* Sun Jun 08 2014 Fedora Release Engineering - 3.02.02-4 +- Rebuilt for https://fedoraproject.org/wiki/Fedora_21_Mass_Rebuild + * Thu Mar 27 2014 Karol Trzcionka - 3.02.02-3 - Fix rhbz#1037350 (-Werror=format-security) - Add OSD data From 512a9a149779e9856a7d6a861f923f2d5411fdc1 Mon Sep 17 00:00:00 2001 From: Sandro Mani Date: Tue, 12 Aug 2014 23:37:16 +0200 Subject: [PATCH 17/24] Update to 3.03.rc1 --- .gitignore | 1 + sources | 4 +-- tesseract-3.02.02-format.patch | 12 -------- tesseract-pkgconfig.patch | 14 --------- tesseract.spec | 56 +++++++++++++++++++--------------- 5 files changed, 34 insertions(+), 53 deletions(-) delete mode 100644 tesseract-3.02.02-format.patch delete mode 100644 tesseract-pkgconfig.patch diff --git a/.gitignore b/.gitignore index 31a4eb6..cd2a650 100644 --- a/.gitignore +++ b/.gitignore @@ -7,3 +7,4 @@ tesseract-2.00.eng.tar.gz /tesseract-ocr-3.02.02.tar.gz /tesseract-ocr-3.02.eng.tar.gz /tesseract-ocr-3.01.osd.tar.gz +/tesseract-3.03-rc1.tar.gz diff --git a/sources b/sources index 22c49be..c61fbc7 100644 --- a/sources +++ b/sources @@ -1,3 +1 @@ -26adc8154f0e815053816825dde246e6 tesseract-ocr-3.02.02.tar.gz -3562250fe6f4e76229a329166b8ae853 tesseract-ocr-3.02.eng.tar.gz -683486e01f5b87c17f2f5815f770ccb3 tesseract-ocr-3.01.osd.tar.gz +d69ceca9ae70e0b7020d0f92d60b8565 tesseract-3.03-rc1.tar.gz diff --git a/tesseract-3.02.02-format.patch b/tesseract-3.02.02-format.patch deleted file mode 100644 index 45a4136..0000000 --- a/tesseract-3.02.02-format.patch +++ /dev/null @@ -1,12 +0,0 @@ -diff -up ./dict/permdawg.cpp.format ./dict/permdawg.cpp ---- ./dict/permdawg.cpp.format 2012-09-02 22:08:43.000000000 +0200 -+++ ./dict/permdawg.cpp 2014-03-27 18:38:40.026525432 +0100 -@@ -205,7 +205,7 @@ void Dict::go_deeper_dawg_fxn( - STRING word_str; - word->string_and_lengths(&word_str, NULL); - word_str += " "; -- fprintf(output_ambig_words_file_, word_str.string()); -+ fprintf(output_ambig_words_file_, "%s", word_str.string()); - } - WERD_CHOICE *adjusted_word = word; - WERD_CHOICE hyphen_tail_word(&getUnicharset()); diff --git a/tesseract-pkgconfig.patch b/tesseract-pkgconfig.patch deleted file mode 100644 index 35a32f2..0000000 --- a/tesseract-pkgconfig.patch +++ /dev/null @@ -1,14 +0,0 @@ -diff -rupN tesseract-ocr/tesseract.pc.in tesseract-ocr-new/tesseract.pc.in ---- tesseract-ocr/tesseract.pc.in 2012-09-29 13:44:44.000000000 +0200 -+++ tesseract-ocr-new/tesseract.pc.in 2013-01-02 01:19:23.000000000 +0100 -@@ -10,6 +10,7 @@ Name: @PACKAGE_NAME@ - Description: An OCR Engine that was developed at HP Labs between 1985 and 1995... and now at Google. - URL: https://code.google.com/p/tesseract-ocr - Version: @VERSION@ --# Requires: lept ## leptonica do not provide lept.pc --Libs: -L${libdir} -ltesseract @LDFLAGS@ @LIBS@ --Cflags: -I${includedir} @CFLAGS@ @CPPFLAGS@ @CXXFLAGS@ @DEFS@ -+Requires.private: lept -+Libs: -L${libdir} -ltesseract -+Libs.private: -lpthread -+Cflags: -I${includedir} diff --git a/tesseract.spec b/tesseract.spec index a6223d5..c4e4dec 100644 --- a/tesseract.spec +++ b/tesseract.spec @@ -1,20 +1,26 @@ %global fullname tesseract-ocr +%global pre rc1 + Name: tesseract -Version: 3.02.02 -Release: 4%{?dist} -Summary: Raw OCR Engine +Version: 3.03 +Release: 0.1%{?pre:.%pre}%{?dist} +Summary: Raw OCR Engine Group: Applications/File License: ASL 2.0 URL: http://code.google.com/p/%{fullname}/ -Source0: http://tesseract-ocr.googlecode.com/files/%{fullname}-%{version}.tar.gz +# The downloads are now posted on google-drive which has impossible download URLS... +# The url of the drive is +# https://drive.google.com/folderview?id=0B7l10Bj_LprhQnpSRkpGMGV2eE0 +Source0: %{name}-%{version}%{?pre:-%pre}.tar.gz Source1: http://tesseract-ocr.googlecode.com/files/%{fullname}-3.02.eng.tar.gz Source2: http://tesseract-ocr.googlecode.com/files/%{fullname}-3.01.osd.tar.gz -Patch0: %{name}-pkgconfig.patch -Patch1: %{name}-%{version}-format.patch BuildRequires: libtiff-devel BuildRequires: leptonica-devel -BuildRequires: automake,libtool +BuildRequires: cairo-devel +BuildRequires: libicu-devel +BuildRequires: pango-devel +BuildRequires: automake libtool Obsoletes: tesseract < 3.02.02 %package devel @@ -40,29 +46,24 @@ developing applications that use %{name}. Orientation & Script Detection Data for %{fullname} %prep -%setup -q -n %{fullname} -%setup -q -a 1 -D -n %{fullname} -%setup -q -a 2 -D -n %{fullname} -%patch0 -p1 -%patch1 -p1 +%setup -q -n %{name}-%{version} -a1 -a2 %build sed -i 's#-DTESSDATA_PREFIX=@datadir@/#-DTESSDATA_PREFIX=@datadir@/%{name}/##' ccutil/Makefile.* +autoreconf -ifv %configure --disable-static -sed -i 's|^hardcode_libdir_flag_spec=.*|hardcode_libdir_flag_spec=""|g' libtool -sed -i 's|^runpath_var=LD_RUN_PATH|runpath_var=DIE_RPATH_DIE|g' libtool make %{?_smp_mflags} +# Remove compiled files, see https://groups.google.com/forum/#!topic/tesseract-dev/ARKOSV3zpWo +make -C training clean +make %{?_smp_mflags} training %install -rm -rf $RPM_BUILD_ROOT -make install DESTDIR=$RPM_BUILD_ROOT -rm -f $RPM_BUILD_ROOT%{_libdir}/*la -mkdir -p $RPM_BUILD_ROOT%{_datadir}/%{name} -mv $RPM_BUILD_ROOT%{_datadir}/tessdata $RPM_BUILD_ROOT%{_datadir}/%{name} -install -m 0644 %{fullname}/tessdata/* $RPM_BUILD_ROOT%{_datadir}/%{name}/tessdata - -%clean -rm -rf $RPM_BUILD_ROOT +%make_install +%make_install training-install +rm -f %{buildroot}%{_libdir}/*la +mkdir -p %{buildroot}%{_datadir}/%{name} +mv %{buildroot}%{_datadir}/tessdata %{buildroot}%{_datadir}/%{name} +install -m 0644 %{fullname}/tessdata/* %{buildroot}%{_datadir}/%{name}/tessdata %post -p /sbin/ldconfig %postun -p /sbin/ldconfig @@ -72,9 +73,11 @@ rm -rf $RPM_BUILD_ROOT %{_bindir}/classifier_tester %{_bindir}/combine_tessdata %{_bindir}/dawg2wordlist +%{_bindir}/set_unicharset_properties %{_bindir}/shapeclustering %{_bindir}/*training %{_bindir}/%{name} +%{_bindir}/text2image %{_bindir}/unicharset_extractor %{_bindir}/wordlist2dawg %dir %{_datadir}/%{name} @@ -82,6 +85,8 @@ rm -rf $RPM_BUILD_ROOT %{_datadir}/%{name}/tessdata/configs %{_datadir}/%{name}/tessdata/tessconfigs %{_datadir}/%{name}/tessdata/eng.* +%{_datadir}/%{name}/tessdata/pdf.ttf +%{_datadir}/%{name}/tessdata/pdf.ttx %{_libdir}/lib%{name}*.so.* %{_mandir}/man1/* %{_mandir}/man5/* @@ -97,6 +102,9 @@ rm -rf $RPM_BUILD_ROOT %{_datadir}/%{name}/tessdata/osd.traineddata %changelog +* Tue Aug 12 2014 Sandro Mani - 3.03-0.1.rc1 +- Update to v3.03-rc1 + * Sun Jun 08 2014 Fedora Release Engineering - 3.02.02-4 - Rebuilt for https://fedoraproject.org/wiki/Fedora_21_Mass_Rebuild @@ -170,7 +178,7 @@ rm -rf $RPM_BUILD_ROOT - Including patch bases on cvs * Tue Feb 13 2007 Karol Trzcionka - 1.03-1 - Update to v1.03 -* Sat Jan 26 2007 Karol Trzcionka - 1.02-3 +* Sat Jan 27 2007 Karol Trzcionka - 1.02-3 - Update BRs - Fix x86_64 compile * Sat Dec 30 2006 Karol Trzcionka - 1.02-2 From 0f45b89fd0f9d328755bf48bbcd91aac21b3b6cc Mon Sep 17 00:00:00 2001 From: Sandro Mani Date: Tue, 12 Aug 2014 23:58:42 +0200 Subject: [PATCH 18/24] Correctly update sources --- sources | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sources b/sources index c61fbc7..660475e 100644 --- a/sources +++ b/sources @@ -1 +1,3 @@ d69ceca9ae70e0b7020d0f92d60b8565 tesseract-3.03-rc1.tar.gz +683486e01f5b87c17f2f5815f770ccb3 tesseract-ocr-3.01.osd.tar.gz +3562250fe6f4e76229a329166b8ae853 tesseract-ocr-3.02.eng.tar.gz From 4237fa47d94a339d3ccfae7cde54a9ebb2524df8 Mon Sep 17 00:00:00 2001 From: Peter Robinson Date: Mon, 18 Aug 2014 05:23:51 +0000 Subject: [PATCH 19/24] - Rebuilt for https://fedoraproject.org/wiki/Fedora_21_22_Mass_Rebuild --- tesseract.spec | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tesseract.spec b/tesseract.spec index c4e4dec..f38ed75 100644 --- a/tesseract.spec +++ b/tesseract.spec @@ -3,7 +3,7 @@ Name: tesseract Version: 3.03 -Release: 0.1%{?pre:.%pre}%{?dist} +Release: 0.2%{?pre:.%pre}%{?dist} Summary: Raw OCR Engine Group: Applications/File @@ -102,6 +102,9 @@ install -m 0644 %{fullname}/tessdata/* %{buildroot}%{_datadir}/%{name}/tessdata %{_datadir}/%{name}/tessdata/osd.traineddata %changelog +* Mon Aug 18 2014 Fedora Release Engineering - 3.03-0.2.rc1 +- Rebuilt for https://fedoraproject.org/wiki/Fedora_21_22_Mass_Rebuild + * Tue Aug 12 2014 Sandro Mani - 3.03-0.1.rc1 - Update to v3.03-rc1 From 31c777336b414090778df2dd109291ac4dd5e9ce Mon Sep 17 00:00:00 2001 From: David Tardon Date: Tue, 26 Aug 2014 13:57:33 +0200 Subject: [PATCH 20/24] rebuild for ICU 53.1 --- tesseract.spec | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tesseract.spec b/tesseract.spec index f38ed75..74c5a0e 100644 --- a/tesseract.spec +++ b/tesseract.spec @@ -3,7 +3,7 @@ Name: tesseract Version: 3.03 -Release: 0.2%{?pre:.%pre}%{?dist} +Release: 0.3%{?pre:.%pre}%{?dist} Summary: Raw OCR Engine Group: Applications/File @@ -102,6 +102,9 @@ install -m 0644 %{fullname}/tessdata/* %{buildroot}%{_datadir}/%{name}/tessdata %{_datadir}/%{name}/tessdata/osd.traineddata %changelog +* Tue Aug 26 2014 David Tardon - 3.03-0.3.rc1 +- rebuild for ICU 53.1 + * Mon Aug 18 2014 Fedora Release Engineering - 3.03-0.2.rc1 - Rebuilt for https://fedoraproject.org/wiki/Fedora_21_22_Mass_Rebuild From c5d0dbb46886341b4f9f25c747fbea86ca29d2dc Mon Sep 17 00:00:00 2001 From: David Tardon Date: Mon, 26 Jan 2015 17:08:45 +0100 Subject: [PATCH 21/24] rebuild for ICU 54.1 --- tesseract.spec | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tesseract.spec b/tesseract.spec index 74c5a0e..cfc9a94 100644 --- a/tesseract.spec +++ b/tesseract.spec @@ -3,7 +3,7 @@ Name: tesseract Version: 3.03 -Release: 0.3%{?pre:.%pre}%{?dist} +Release: 0.4%{?pre:.%pre}%{?dist} Summary: Raw OCR Engine Group: Applications/File @@ -102,6 +102,9 @@ install -m 0644 %{fullname}/tessdata/* %{buildroot}%{_datadir}/%{name}/tessdata %{_datadir}/%{name}/tessdata/osd.traineddata %changelog +* Mon Jan 26 2015 David Tardon - 3.03-0.4.rc1 +- rebuild for ICU 54.1 + * Tue Aug 26 2014 David Tardon - 3.03-0.3.rc1 - rebuild for ICU 53.1 From a5c35a632156750685f9b11ba432f9d3635baaa0 Mon Sep 17 00:00:00 2001 From: Kalev Lember Date: Sat, 2 May 2015 18:10:29 +0200 Subject: [PATCH 22/24] Rebuilt for GCC 5 C++11 ABI change --- tesseract.spec | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tesseract.spec b/tesseract.spec index cfc9a94..081c2ea 100644 --- a/tesseract.spec +++ b/tesseract.spec @@ -3,7 +3,7 @@ Name: tesseract Version: 3.03 -Release: 0.4%{?pre:.%pre}%{?dist} +Release: 0.5%{?pre:.%pre}%{?dist} Summary: Raw OCR Engine Group: Applications/File @@ -102,6 +102,9 @@ install -m 0644 %{fullname}/tessdata/* %{buildroot}%{_datadir}/%{name}/tessdata %{_datadir}/%{name}/tessdata/osd.traineddata %changelog +* Sat May 02 2015 Kalev Lember - 3.03-0.5.rc1 +- Rebuilt for GCC 5 C++11 ABI change + * Mon Jan 26 2015 David Tardon - 3.03-0.4.rc1 - rebuild for ICU 54.1 From f03fe441cb382fb23e814ccad08ea8f570ee2e62 Mon Sep 17 00:00:00 2001 From: Dennis Gilmore Date: Fri, 19 Jun 2015 02:44:39 +0000 Subject: [PATCH 23/24] - Rebuilt for https://fedoraproject.org/wiki/Fedora_23_Mass_Rebuild --- tesseract.spec | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tesseract.spec b/tesseract.spec index 081c2ea..3162e5d 100644 --- a/tesseract.spec +++ b/tesseract.spec @@ -3,7 +3,7 @@ Name: tesseract Version: 3.03 -Release: 0.5%{?pre:.%pre}%{?dist} +Release: 0.6%{?pre:.%pre}%{?dist} Summary: Raw OCR Engine Group: Applications/File @@ -102,6 +102,9 @@ install -m 0644 %{fullname}/tessdata/* %{buildroot}%{_datadir}/%{name}/tessdata %{_datadir}/%{name}/tessdata/osd.traineddata %changelog +* Fri Jun 19 2015 Fedora Release Engineering - 3.03-0.6.rc1 +- Rebuilt for https://fedoraproject.org/wiki/Fedora_23_Mass_Rebuild + * Sat May 02 2015 Kalev Lember - 3.03-0.5.rc1 - Rebuilt for GCC 5 C++11 ABI change From e63c31a8f21596d398be1d011b137ed32944ea83 Mon Sep 17 00:00:00 2001 From: Sandro Mani Date: Tue, 6 Oct 2015 11:30:59 +0200 Subject: [PATCH 24/24] Update to version 3.04.00 --- .gitignore | 2 + sources | 5 +- tesseract.spec | 213 +++++++++++++++++++++++++++++++--------- tesseract_datadir.patch | 37 +++++++ 4 files changed, 209 insertions(+), 48 deletions(-) create mode 100644 tesseract_datadir.patch diff --git a/.gitignore b/.gitignore index cd2a650..b5f2981 100644 --- a/.gitignore +++ b/.gitignore @@ -8,3 +8,5 @@ tesseract-2.00.eng.tar.gz /tesseract-ocr-3.02.eng.tar.gz /tesseract-ocr-3.01.osd.tar.gz /tesseract-3.03-rc1.tar.gz +/tesseract-3.04.00.tar.gz +/tessdata-3.04.00.tar.gz diff --git a/sources b/sources index 660475e..b209211 100644 --- a/sources +++ b/sources @@ -1,3 +1,2 @@ -d69ceca9ae70e0b7020d0f92d60b8565 tesseract-3.03-rc1.tar.gz -683486e01f5b87c17f2f5815f770ccb3 tesseract-ocr-3.01.osd.tar.gz -3562250fe6f4e76229a329166b8ae853 tesseract-ocr-3.02.eng.tar.gz +078130b9c7d28c558a0e49d432505864 tesseract-3.04.00.tar.gz +b25e830d203af5c863081af3f684b53a tessdata-3.04.00.tar.gz diff --git a/tesseract.spec b/tesseract.spec index 3162e5d..284a8ba 100644 --- a/tesseract.spec +++ b/tesseract.spec @@ -1,74 +1,196 @@ -%global fullname tesseract-ocr -%global pre rc1 +Name: tesseract +Version: 3.04.00 +Release: 1%{?dist} +Summary: Raw OCR Engine -Name: tesseract -Version: 3.03 -Release: 0.6%{?pre:.%pre}%{?dist} -Summary: Raw OCR Engine +License: ASL 2.0 +URL: https://github.com/tesseract-ocr/%{name} +Source0: https://github.com/tesseract-ocr/tesseract/archive/%{version}.tar.gz#/%{name}-%{version}.tar.gz +Source1: https://github.com/tesseract-ocr/tessdata/archive/%{version}.tar.gz#/tessdata-%{version}.tar.gz -Group: Applications/File -License: ASL 2.0 -URL: http://code.google.com/p/%{fullname}/ -# The downloads are now posted on google-drive which has impossible download URLS... -# The url of the drive is -# https://drive.google.com/folderview?id=0B7l10Bj_LprhQnpSRkpGMGV2eE0 -Source0: %{name}-%{version}%{?pre:-%pre}.tar.gz -Source1: http://tesseract-ocr.googlecode.com/files/%{fullname}-3.02.eng.tar.gz -Source2: http://tesseract-ocr.googlecode.com/files/%{fullname}-3.01.osd.tar.gz -BuildRequires: libtiff-devel -BuildRequires: leptonica-devel -BuildRequires: cairo-devel -BuildRequires: libicu-devel -BuildRequires: pango-devel -BuildRequires: automake libtool -Obsoletes: tesseract < 3.02.02 +# Tweak location of tessdata folder +Patch0: tesseract_datadir.patch -%package devel -Summary: Development files for %{fullname} -Group: Development/Libraries -Requires: %{name} = %{version}-%{release} +BuildRequires: libtiff-devel +BuildRequires: leptonica-devel +BuildRequires: cairo-devel +BuildRequires: libicu-devel +BuildRequires: pango-devel +BuildRequires: automake libtool -%package osd -Summary: Orientation & Script Detection Data for %{fullname} -Group: Applications/File -Requires: %{name} = %{version}-%{release} %description A commercial quality OCR engine originally developed at HP between 1985 and 1995. In 1995, this engine was among the top 3 evaluated by UNLV. It was open-sourced by HP and UNLV in 2005. + +%package devel +Summary: Development files for %{name} +Requires: %{name}%{?_isa} = %{version}-%{release} + %description devel The %{name}-devel package contains header file for developing applications that use %{name}. + +%package osd +Summary: Orientation & Script Detection Data for %{name} +Requires: %{name}%{?_isa} = %{version}-%{release} + %description osd -Orientation & Script Detection Data for %{fullname} +Orientation & Script Detection Data for %{name} + +%define lang_subpkg() \ +%package langpack-%{1}\ +Summary: %{2} language data for %{name}\ +BuildArch: noarch\ +Requires: %{name} = %{version}-%{release}\ +\ +%description langpack-%{1}\ +%{2} language data for %{name}.\ +\ +%files langpack-%{1}\ +%{_datadir}/%{name}/tessdata/%{1}.* + +# see https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes +# and https://en.wikipedia.org/wiki/ISO_639_macrolanguage +%lang_subpkg afr Afrikaans +%lang_subpkg amh Amharic +%lang_subpkg ara Arabic +%lang_subpkg asm Assamese +%lang_subpkg aze Azerbaijani +%lang_subpkg aze_cyrl "Azerbaijani (Cyrilic)" +%lang_subpkg bel Belarusian +%lang_subpkg ben Bengali +%lang_subpkg bod "Tibetan (Standard)" +%lang_subpkg bos Bosnian +%lang_subpkg bul Bulgarian +%lang_subpkg cat Catalan +%lang_subpkg ceb Cebuano +%lang_subpkg ces Czech +%lang_subpkg chi_sim "Chinese (Simplified)" +%lang_subpkg chi_tra "Chinese (Traditional)" +%lang_subpkg chr Cherokee +%lang_subpkg cym Welsh +%lang_subpkg dan Danish +%lang_subpkg dan_frak "Danish (Fraktur)" +%lang_subpkg deu German +%lang_subpkg deu_frak "German (Fraktur)" +%lang_subpkg dzo Dzongkha +%lang_subpkg ell Greek +%lang_subpkg enm "Middle English (1100-1500)" +%lang_subpkg epo Esperanto +%lang_subpkg equ "Math / equation" +%lang_subpkg est Estonian +%lang_subpkg eus Basque +%lang_subpkg fas "Persian (Farsi)" +%lang_subpkg fin Finnish +%lang_subpkg fra French +%lang_subpkg frk Frankish +%lang_subpkg frm "Middle French (ca. 1400-1600)" +%lang_subpkg gle Irish +%lang_subpkg glg Galician +%lang_subpkg grc "Ancient Greek" +%lang_subpkg guj Gujarati +%lang_subpkg hat Haitian +%lang_subpkg heb Hebrew +%lang_subpkg hin Hindi +%lang_subpkg hrv Croatian +%lang_subpkg hun Hungarian +%lang_subpkg iku Inuktitut +%lang_subpkg ind Indonesian +%lang_subpkg isl Icelandic +%lang_subpkg ita Italian +%lang_subpkg ita_old "Italian (Old)" +%lang_subpkg jav Javanese +%lang_subpkg jpn Japanese +%lang_subpkg kan Kannada +%lang_subpkg kat Georgian +%lang_subpkg kat_old "Georgian (Old)" +%lang_subpkg kaz Kazakh +%lang_subpkg khm Khmer +%lang_subpkg kir Kyrgyz +%lang_subpkg kor Korean +%lang_subpkg kur Kurdish +%lang_subpkg lao Lao +%lang_subpkg lat Latin +%lang_subpkg lav Latvian +%lang_subpkg lit Lithuanian +%lang_subpkg mal Malayalam +%lang_subpkg mar Marathi +%lang_subpkg mkd Macedonian +%lang_subpkg mlt Maltese +%lang_subpkg msa Malay +%lang_subpkg mya Burmese +%lang_subpkg nep Nepali +%lang_subpkg nld Dutch +%lang_subpkg nor Norwegian +%lang_subpkg ori Oriya +%lang_subpkg pan Panjabi +%lang_subpkg pol Polish +%lang_subpkg por Portuguese +%lang_subpkg pus Pashto +%lang_subpkg ron Romanian +%lang_subpkg rus Russian +%lang_subpkg san Sanskrit +%lang_subpkg sin Sinhala +%lang_subpkg slk Slovakian +%lang_subpkg slk_frak "Slovakian (Fraktur)" +%lang_subpkg slv Slovenian +%lang_subpkg spa Spanish +%lang_subpkg spa_old "Spanish (Old)" +%lang_subpkg sqi Albanian +%lang_subpkg srp Serbian +%lang_subpkg srp_latn "Serbian (Latin)" +%lang_subpkg swa Swahili +%lang_subpkg swe Swedish +%lang_subpkg syr Syriac +%lang_subpkg tam Tamil +%lang_subpkg tel Telugu +%lang_subpkg tgk Tajik +%lang_subpkg tgl Tagalog +%lang_subpkg tha Thai +%lang_subpkg tir Tigrinya +%lang_subpkg tur Turkish +%lang_subpkg uig Uyghur +%lang_subpkg ukr Ukrainian +%lang_subpkg urd Urdu +%lang_subpkg uzb Uzbek +%lang_subpkg uzb_cyrl "Uzbek (Cyrillic)" +%lang_subpkg vie Vietnamese +%lang_subpkg yid Yiddish + %prep -%setup -q -n %{name}-%{version} -a1 -a2 +%setup -q -n %{name}-%{version} -a1 +%patch0 -p1 + %build -sed -i 's#-DTESSDATA_PREFIX=@datadir@/#-DTESSDATA_PREFIX=@datadir@/%{name}/##' ccutil/Makefile.* autoreconf -ifv %configure --disable-static + make %{?_smp_mflags} -# Remove compiled files, see https://groups.google.com/forum/#!topic/tesseract-dev/ARKOSV3zpWo -make -C training clean make %{?_smp_mflags} training + %install %make_install %make_install training-install -rm -f %{buildroot}%{_libdir}/*la -mkdir -p %{buildroot}%{_datadir}/%{name} -mv %{buildroot}%{_datadir}/tessdata %{buildroot}%{_datadir}/%{name} -install -m 0644 %{fullname}/tessdata/* %{buildroot}%{_datadir}/%{name}/tessdata + +find %{buildroot}%{_libdir} -type f -name '*.la' -delete + +install -pm 0644 tessdata-%{version}/* %{buildroot}/%{_datadir}/%{name}/tessdata/ + %post -p /sbin/ldconfig %postun -p /sbin/ldconfig + %files +%license COPYING +%doc AUTHORS ChangeLog NEWS README testing/eurotext.tif testing/phototest.tif %{_bindir}/ambiguous_words %{_bindir}/classifier_tester %{_bindir}/combine_tessdata @@ -82,17 +204,14 @@ install -m 0644 %{fullname}/tessdata/* %{buildroot}%{_datadir}/%{name}/tessdata %{_bindir}/wordlist2dawg %dir %{_datadir}/%{name} %dir %{_datadir}/%{name}/tessdata -%{_datadir}/%{name}/tessdata/configs -%{_datadir}/%{name}/tessdata/tessconfigs +%{_datadir}/%{name}/tessdata/configs/ +%{_datadir}/%{name}/tessdata/tessconfigs/ %{_datadir}/%{name}/tessdata/eng.* %{_datadir}/%{name}/tessdata/pdf.ttf -%{_datadir}/%{name}/tessdata/pdf.ttx -%{_libdir}/lib%{name}*.so.* +%{_libdir}/lib%{name}*.so.3* %{_mandir}/man1/* %{_mandir}/man5/* -%doc AUTHORS ChangeLog COPYING eurotext.tif NEWS phototest.tif README - %files devel %{_includedir}/%{name} %{_libdir}/lib%{name}*.so @@ -101,7 +220,11 @@ install -m 0644 %{fullname}/tessdata/* %{buildroot}%{_datadir}/%{name}/tessdata %files osd %{_datadir}/%{name}/tessdata/osd.traineddata + %changelog +* Sat Sep 12 2015 Sandro Mani - 3.04.00-1 +- Update to 3.04.00 + * Fri Jun 19 2015 Fedora Release Engineering - 3.03-0.6.rc1 - Rebuilt for https://fedoraproject.org/wiki/Fedora_23_Mass_Rebuild diff --git a/tesseract_datadir.patch b/tesseract_datadir.patch new file mode 100644 index 0000000..49f5509 --- /dev/null +++ b/tesseract_datadir.patch @@ -0,0 +1,37 @@ +diff -rupN tesseract-3.04.00/ccutil/Makefile.am tesseract-3.04.00-new/ccutil/Makefile.am +--- tesseract-3.04.00/ccutil/Makefile.am 2015-07-11 09:53:12.000000000 +0200 ++++ tesseract-3.04.00-new/ccutil/Makefile.am 2015-09-12 19:10:31.983919381 +0200 +@@ -3,7 +3,7 @@ SUBDIRS = + AM_CXXFLAGS = + + if !NO_TESSDATA_PREFIX +-AM_CXXFLAGS += -DTESSDATA_PREFIX=@datadir@/ ++AM_CXXFLAGS += -DTESSDATA_PREFIX=@datadir@/tesseract/ + endif + + if VISIBILITY +diff -rupN tesseract-3.04.00/tessdata/configs/Makefile.am tesseract-3.04.00-new/tessdata/configs/Makefile.am +--- tesseract-3.04.00/tessdata/configs/Makefile.am 2015-07-11 09:53:12.000000000 +0200 ++++ tesseract-3.04.00-new/tessdata/configs/Makefile.am 2015-09-12 19:10:40.978587765 +0200 +@@ -1,3 +1,3 @@ +-datadir = @datadir@/tessdata/configs ++datadir = @datadir@/tesseract/tessdata/configs + data_DATA = inter makebox box.train unlv ambigs.train api_config kannada box.train.stderr quiet logfile digits hocr linebox pdf rebox strokewidth bigram + EXTRA_DIST = inter makebox box.train unlv ambigs.train api_config kannada box.train.stderr quiet logfile digits hocr linebox pdf rebox strokewidth bigram +diff -rupN tesseract-3.04.00/tessdata/Makefile.am tesseract-3.04.00-new/tessdata/Makefile.am +--- tesseract-3.04.00/tessdata/Makefile.am 2015-07-11 09:53:12.000000000 +0200 ++++ tesseract-3.04.00-new/tessdata/Makefile.am 2015-09-12 19:10:31.985919304 +0200 +@@ -1,4 +1,4 @@ +-datadir = @datadir@/tessdata ++datadir = @datadir@/tesseract/tessdata + + data_DATA = pdf.ttf + EXTRA_DIST = $(data_DATA) +diff -rupN tesseract-3.04.00/tessdata/tessconfigs/Makefile.am tesseract-3.04.00-new/tessdata/tessconfigs/Makefile.am +--- tesseract-3.04.00/tessdata/tessconfigs/Makefile.am 2015-07-11 09:53:12.000000000 +0200 ++++ tesseract-3.04.00-new/tessdata/tessconfigs/Makefile.am 2015-09-12 19:10:48.218340816 +0200 +@@ -1,3 +1,3 @@ +-datadir = @datadir@/tessdata/tessconfigs ++datadir = @datadir@/tesseract/tessdata/tessconfigs + data_DATA = batch batch.nochop nobatch matdemo segdemo msdemo + EXTRA_DIST = batch batch.nochop nobatch matdemo segdemo msdemo