diff --git a/sources b/sources index f738c11..b209211 100644 --- a/sources +++ b/sources @@ -1,2 +1,2 @@ -b44eba1a9f4892ac62e484c807fe0533 tesseract-2.04.tar.gz -b8291d6b3a63ce7879d688e845e341a9 tesseract-2.00.eng.tar.gz +078130b9c7d28c558a0e49d432505864 tesseract-3.04.00.tar.gz +b25e830d203af5c863081af3f684b53a tessdata-3.04.00.tar.gz diff --git a/tesseract.spec b/tesseract.spec index de59d6d..284a8ba 100644 --- a/tesseract.spec +++ b/tesseract.spec @@ -1,67 +1,290 @@ -Name: tesseract -Version: 2.04 -Release: 1%{?dist} -Summary: Raw OCR Engine +Name: tesseract +Version: 3.04.00 +Release: 1%{?dist} +Summary: Raw OCR Engine -Group: Applications/File -License: ASL 2.0 -URL: http://code.google.com/p/tesseract-ocr/ -Source0: http://tesseract-ocr.googlecode.com/files/%{name}-%{version}.tar.gz -Source1: http://tesseract-ocr.googlecode.com/files/%{name}-2.00.eng.tar.gz -BuildRoot: %{_tmppath}/%{name}-%{version}-%{release}-root-%(%{__id_u} -n) -BuildRequires: libtiff-devel +License: ASL 2.0 +URL: https://github.com/tesseract-ocr/%{name} +Source0: https://github.com/tesseract-ocr/tesseract/archive/%{version}.tar.gz#/%{name}-%{version}.tar.gz +Source1: https://github.com/tesseract-ocr/tessdata/archive/%{version}.tar.gz#/tessdata-%{version}.tar.gz + +# Tweak location of tessdata folder +Patch0: tesseract_datadir.patch + +BuildRequires: libtiff-devel +BuildRequires: leptonica-devel +BuildRequires: cairo-devel +BuildRequires: libicu-devel +BuildRequires: pango-devel +BuildRequires: automake libtool -%package devel -Summary: Development files for %{name} -Group: Development/Libraries -Requires: %{name} = %{version}-%{release} -Provides: %{name}-static = %{version}-%{release} %description A commercial quality OCR engine originally developed at HP between 1985 and 1995. In 1995, this engine was among the top 3 evaluated by UNLV. It was open-sourced by HP and UNLV in 2005. + +%package devel +Summary: Development files for %{name} +Requires: %{name}%{?_isa} = %{version}-%{release} + %description devel The %{name}-devel package contains header file for developing applications that use %{name}. + +%package osd +Summary: Orientation & Script Detection Data for %{name} +Requires: %{name}%{?_isa} = %{version}-%{release} + +%description osd +Orientation & Script Detection Data for %{name} + +%define lang_subpkg() \ +%package langpack-%{1}\ +Summary: %{2} language data for %{name}\ +BuildArch: noarch\ +Requires: %{name} = %{version}-%{release}\ +\ +%description langpack-%{1}\ +%{2} language data for %{name}.\ +\ +%files langpack-%{1}\ +%{_datadir}/%{name}/tessdata/%{1}.* + +# see https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes +# and https://en.wikipedia.org/wiki/ISO_639_macrolanguage +%lang_subpkg afr Afrikaans +%lang_subpkg amh Amharic +%lang_subpkg ara Arabic +%lang_subpkg asm Assamese +%lang_subpkg aze Azerbaijani +%lang_subpkg aze_cyrl "Azerbaijani (Cyrilic)" +%lang_subpkg bel Belarusian +%lang_subpkg ben Bengali +%lang_subpkg bod "Tibetan (Standard)" +%lang_subpkg bos Bosnian +%lang_subpkg bul Bulgarian +%lang_subpkg cat Catalan +%lang_subpkg ceb Cebuano +%lang_subpkg ces Czech +%lang_subpkg chi_sim "Chinese (Simplified)" +%lang_subpkg chi_tra "Chinese (Traditional)" +%lang_subpkg chr Cherokee +%lang_subpkg cym Welsh +%lang_subpkg dan Danish +%lang_subpkg dan_frak "Danish (Fraktur)" +%lang_subpkg deu German +%lang_subpkg deu_frak "German (Fraktur)" +%lang_subpkg dzo Dzongkha +%lang_subpkg ell Greek +%lang_subpkg enm "Middle English (1100-1500)" +%lang_subpkg epo Esperanto +%lang_subpkg equ "Math / equation" +%lang_subpkg est Estonian +%lang_subpkg eus Basque +%lang_subpkg fas "Persian (Farsi)" +%lang_subpkg fin Finnish +%lang_subpkg fra French +%lang_subpkg frk Frankish +%lang_subpkg frm "Middle French (ca. 1400-1600)" +%lang_subpkg gle Irish +%lang_subpkg glg Galician +%lang_subpkg grc "Ancient Greek" +%lang_subpkg guj Gujarati +%lang_subpkg hat Haitian +%lang_subpkg heb Hebrew +%lang_subpkg hin Hindi +%lang_subpkg hrv Croatian +%lang_subpkg hun Hungarian +%lang_subpkg iku Inuktitut +%lang_subpkg ind Indonesian +%lang_subpkg isl Icelandic +%lang_subpkg ita Italian +%lang_subpkg ita_old "Italian (Old)" +%lang_subpkg jav Javanese +%lang_subpkg jpn Japanese +%lang_subpkg kan Kannada +%lang_subpkg kat Georgian +%lang_subpkg kat_old "Georgian (Old)" +%lang_subpkg kaz Kazakh +%lang_subpkg khm Khmer +%lang_subpkg kir Kyrgyz +%lang_subpkg kor Korean +%lang_subpkg kur Kurdish +%lang_subpkg lao Lao +%lang_subpkg lat Latin +%lang_subpkg lav Latvian +%lang_subpkg lit Lithuanian +%lang_subpkg mal Malayalam +%lang_subpkg mar Marathi +%lang_subpkg mkd Macedonian +%lang_subpkg mlt Maltese +%lang_subpkg msa Malay +%lang_subpkg mya Burmese +%lang_subpkg nep Nepali +%lang_subpkg nld Dutch +%lang_subpkg nor Norwegian +%lang_subpkg ori Oriya +%lang_subpkg pan Panjabi +%lang_subpkg pol Polish +%lang_subpkg por Portuguese +%lang_subpkg pus Pashto +%lang_subpkg ron Romanian +%lang_subpkg rus Russian +%lang_subpkg san Sanskrit +%lang_subpkg sin Sinhala +%lang_subpkg slk Slovakian +%lang_subpkg slk_frak "Slovakian (Fraktur)" +%lang_subpkg slv Slovenian +%lang_subpkg spa Spanish +%lang_subpkg spa_old "Spanish (Old)" +%lang_subpkg sqi Albanian +%lang_subpkg srp Serbian +%lang_subpkg srp_latn "Serbian (Latin)" +%lang_subpkg swa Swahili +%lang_subpkg swe Swedish +%lang_subpkg syr Syriac +%lang_subpkg tam Tamil +%lang_subpkg tel Telugu +%lang_subpkg tgk Tajik +%lang_subpkg tgl Tagalog +%lang_subpkg tha Thai +%lang_subpkg tir Tigrinya +%lang_subpkg tur Turkish +%lang_subpkg uig Uyghur +%lang_subpkg ukr Ukrainian +%lang_subpkg urd Urdu +%lang_subpkg uzb Uzbek +%lang_subpkg uzb_cyrl "Uzbek (Cyrillic)" +%lang_subpkg vie Vietnamese +%lang_subpkg yid Yiddish + + %prep -%setup -q -a 1 +%setup -q -n %{name}-%{version} -a1 +%patch0 -p1 + %build -sed -i 's#-DTESSDATA_PREFIX=@datadir@/#-DTESSDATA_PREFIX=@datadir@/%{name}/##' ccutil/Makefile.* -sed -i 's/#include /#include \n#include /' viewer/svutil.cpp -%configure +autoreconf -ifv +%configure --disable-static + make %{?_smp_mflags} +make %{?_smp_mflags} training + %install -rm -rf $RPM_BUILD_ROOT -make install DESTDIR=$RPM_BUILD_ROOT -mkdir -p $RPM_BUILD_ROOT%{_datadir}/tesseract -mv $RPM_BUILD_ROOT%{_datadir}/tessdata $RPM_BUILD_ROOT%{_datadir}/tesseract -rm $RPM_BUILD_ROOT%{_datadir}/%{name}/tessdata/{deu,fra,ita,nld,spa}* +%make_install +%make_install training-install + +find %{buildroot}%{_libdir} -type f -name '*.la' -delete + +install -pm 0644 tessdata-%{version}/* %{buildroot}/%{_datadir}/%{name}/tessdata/ -%clean -rm -rf $RPM_BUILD_ROOT +%post -p /sbin/ldconfig +%postun -p /sbin/ldconfig + %files -%defattr(-,root,root,-) -%{_bindir}/%{name} +%license COPYING +%doc AUTHORS ChangeLog NEWS README testing/eurotext.tif testing/phototest.tif +%{_bindir}/ambiguous_words +%{_bindir}/classifier_tester +%{_bindir}/combine_tessdata +%{_bindir}/dawg2wordlist +%{_bindir}/set_unicharset_properties +%{_bindir}/shapeclustering %{_bindir}/*training +%{_bindir}/%{name} +%{_bindir}/text2image %{_bindir}/unicharset_extractor %{_bindir}/wordlist2dawg -%{_datadir}/%{name} - -%doc AUTHORS ChangeLog COPYING eurotext.tif NEWS phototest.tif README +%dir %{_datadir}/%{name} +%dir %{_datadir}/%{name}/tessdata +%{_datadir}/%{name}/tessdata/configs/ +%{_datadir}/%{name}/tessdata/tessconfigs/ +%{_datadir}/%{name}/tessdata/eng.* +%{_datadir}/%{name}/tessdata/pdf.ttf +%{_libdir}/lib%{name}*.so.3* +%{_mandir}/man1/* +%{_mandir}/man5/* %files devel -%defattr(-,root,root,-) %{_includedir}/%{name} -%{_libdir}/lib%{name}* +%{_libdir}/lib%{name}*.so +%{_libdir}/pkgconfig/%{name}.pc + +%files osd +%{_datadir}/%{name}/tessdata/osd.traineddata + %changelog +* Sat Sep 12 2015 Sandro Mani - 3.04.00-1 +- Update to 3.04.00 + +* Fri Jun 19 2015 Fedora Release Engineering - 3.03-0.6.rc1 +- Rebuilt for https://fedoraproject.org/wiki/Fedora_23_Mass_Rebuild + +* Sat May 02 2015 Kalev Lember - 3.03-0.5.rc1 +- Rebuilt for GCC 5 C++11 ABI change + +* Mon Jan 26 2015 David Tardon - 3.03-0.4.rc1 +- rebuild for ICU 54.1 + +* Tue Aug 26 2014 David Tardon - 3.03-0.3.rc1 +- rebuild for ICU 53.1 + +* Mon Aug 18 2014 Fedora Release Engineering - 3.03-0.2.rc1 +- Rebuilt for https://fedoraproject.org/wiki/Fedora_21_22_Mass_Rebuild + +* Tue Aug 12 2014 Sandro Mani - 3.03-0.1.rc1 +- Update to v3.03-rc1 + +* Sun Jun 08 2014 Fedora Release Engineering - 3.02.02-4 +- Rebuilt for https://fedoraproject.org/wiki/Fedora_21_Mass_Rebuild + +* Thu Mar 27 2014 Karol Trzcionka - 3.02.02-3 +- Fix rhbz#1037350 (-Werror=format-security) +- Add OSD data +- Remove BuildRoot tag + +* Sun Aug 04 2013 Fedora Release Engineering - 3.02.02-2 +- Rebuilt for https://fedoraproject.org/wiki/Fedora_20_Mass_Rebuild + +* Sat Apr 27 2013 Karol Trzcionka - 3.02.02-1 +- Update to v3.02.02 +- Apply pkgconfig patch rhbz#904806 + +* Fri Feb 15 2013 Fedora Release Engineering - 3.01-2 +- Rebuilt for https://fedoraproject.org/wiki/Fedora_19_Mass_Rebuild + +* Sat Oct 06 2012 Karol Trzcionka - 3.01-1 +- Update to v3.01 +- Add manual pages +- Add BRs leptonica, automake + +* Tue Jul 31 2012 Kevin Kofler - 3.00-6 +- Fix FTBFS with g++ 4.7 + +* Sat Jul 21 2012 Fedora Release Engineering - 3.00-5 +- Rebuilt for https://fedoraproject.org/wiki/Fedora_18_Mass_Rebuild + +* Tue Feb 28 2012 Fedora Release Engineering - 3.00-4 +- Rebuilt for c++ ABI breakage + +* Sat Jan 14 2012 Fedora Release Engineering - 3.00-3 +- Rebuilt for https://fedoraproject.org/wiki/Fedora_17_Mass_Rebuild + +* Wed Feb 09 2011 Fedora Release Engineering - 3.00-2 +- Rebuilt for https://fedoraproject.org/wiki/Fedora_15_Mass_Rebuild + +* Tue Nov 16 2010 Karol Trzcionka - 3.00-1 +- Update to v3.00 +- Remove static libs and add dynamic + * Wed Oct 21 2009 Karol Trzcionka - 2.04-1 - Update to v2.04 - Add static libraries to -devel subpackage @@ -93,7 +316,7 @@ rm -rf $RPM_BUILD_ROOT - Including patch bases on cvs * Tue Feb 13 2007 Karol Trzcionka - 1.03-1 - Update to v1.03 -* Sat Jan 26 2007 Karol Trzcionka - 1.02-3 +* Sat Jan 27 2007 Karol Trzcionka - 1.02-3 - Update BRs - Fix x86_64 compile * Sat Dec 30 2006 Karol Trzcionka - 1.02-2 diff --git a/tesseract_datadir.patch b/tesseract_datadir.patch new file mode 100644 index 0000000..49f5509 --- /dev/null +++ b/tesseract_datadir.patch @@ -0,0 +1,37 @@ +diff -rupN tesseract-3.04.00/ccutil/Makefile.am tesseract-3.04.00-new/ccutil/Makefile.am +--- tesseract-3.04.00/ccutil/Makefile.am 2015-07-11 09:53:12.000000000 +0200 ++++ tesseract-3.04.00-new/ccutil/Makefile.am 2015-09-12 19:10:31.983919381 +0200 +@@ -3,7 +3,7 @@ SUBDIRS = + AM_CXXFLAGS = + + if !NO_TESSDATA_PREFIX +-AM_CXXFLAGS += -DTESSDATA_PREFIX=@datadir@/ ++AM_CXXFLAGS += -DTESSDATA_PREFIX=@datadir@/tesseract/ + endif + + if VISIBILITY +diff -rupN tesseract-3.04.00/tessdata/configs/Makefile.am tesseract-3.04.00-new/tessdata/configs/Makefile.am +--- tesseract-3.04.00/tessdata/configs/Makefile.am 2015-07-11 09:53:12.000000000 +0200 ++++ tesseract-3.04.00-new/tessdata/configs/Makefile.am 2015-09-12 19:10:40.978587765 +0200 +@@ -1,3 +1,3 @@ +-datadir = @datadir@/tessdata/configs ++datadir = @datadir@/tesseract/tessdata/configs + data_DATA = inter makebox box.train unlv ambigs.train api_config kannada box.train.stderr quiet logfile digits hocr linebox pdf rebox strokewidth bigram + EXTRA_DIST = inter makebox box.train unlv ambigs.train api_config kannada box.train.stderr quiet logfile digits hocr linebox pdf rebox strokewidth bigram +diff -rupN tesseract-3.04.00/tessdata/Makefile.am tesseract-3.04.00-new/tessdata/Makefile.am +--- tesseract-3.04.00/tessdata/Makefile.am 2015-07-11 09:53:12.000000000 +0200 ++++ tesseract-3.04.00-new/tessdata/Makefile.am 2015-09-12 19:10:31.985919304 +0200 +@@ -1,4 +1,4 @@ +-datadir = @datadir@/tessdata ++datadir = @datadir@/tesseract/tessdata + + data_DATA = pdf.ttf + EXTRA_DIST = $(data_DATA) +diff -rupN tesseract-3.04.00/tessdata/tessconfigs/Makefile.am tesseract-3.04.00-new/tessdata/tessconfigs/Makefile.am +--- tesseract-3.04.00/tessdata/tessconfigs/Makefile.am 2015-07-11 09:53:12.000000000 +0200 ++++ tesseract-3.04.00-new/tessdata/tessconfigs/Makefile.am 2015-09-12 19:10:48.218340816 +0200 +@@ -1,3 +1,3 @@ +-datadir = @datadir@/tessdata/tessconfigs ++datadir = @datadir@/tesseract/tessdata/tessconfigs + data_DATA = batch batch.nochop nobatch matdemo segdemo msdemo + EXTRA_DIST = batch batch.nochop nobatch matdemo segdemo msdemo