Update to version 3.04.00

This commit is contained in:
Sandro Mani 2015-10-06 11:30:59 +02:00
parent f03fe441cb
commit e63c31a8f2
4 changed files with 209 additions and 48 deletions

2
.gitignore vendored
View File

@ -8,3 +8,5 @@ tesseract-2.00.eng.tar.gz
/tesseract-ocr-3.02.eng.tar.gz
/tesseract-ocr-3.01.osd.tar.gz
/tesseract-3.03-rc1.tar.gz
/tesseract-3.04.00.tar.gz
/tessdata-3.04.00.tar.gz

View File

@ -1,3 +1,2 @@
d69ceca9ae70e0b7020d0f92d60b8565 tesseract-3.03-rc1.tar.gz
683486e01f5b87c17f2f5815f770ccb3 tesseract-ocr-3.01.osd.tar.gz
3562250fe6f4e76229a329166b8ae853 tesseract-ocr-3.02.eng.tar.gz
078130b9c7d28c558a0e49d432505864 tesseract-3.04.00.tar.gz
b25e830d203af5c863081af3f684b53a tessdata-3.04.00.tar.gz

View File

@ -1,74 +1,196 @@
%global fullname tesseract-ocr
%global pre rc1
Name: tesseract
Version: 3.04.00
Release: 1%{?dist}
Summary: Raw OCR Engine
Name: tesseract
Version: 3.03
Release: 0.6%{?pre:.%pre}%{?dist}
Summary: Raw OCR Engine
License: ASL 2.0
URL: https://github.com/tesseract-ocr/%{name}
Source0: https://github.com/tesseract-ocr/tesseract/archive/%{version}.tar.gz#/%{name}-%{version}.tar.gz
Source1: https://github.com/tesseract-ocr/tessdata/archive/%{version}.tar.gz#/tessdata-%{version}.tar.gz
Group: Applications/File
License: ASL 2.0
URL: http://code.google.com/p/%{fullname}/
# The downloads are now posted on google-drive which has impossible download URLS...
# The url of the drive is
# https://drive.google.com/folderview?id=0B7l10Bj_LprhQnpSRkpGMGV2eE0
Source0: %{name}-%{version}%{?pre:-%pre}.tar.gz
Source1: http://tesseract-ocr.googlecode.com/files/%{fullname}-3.02.eng.tar.gz
Source2: http://tesseract-ocr.googlecode.com/files/%{fullname}-3.01.osd.tar.gz
BuildRequires: libtiff-devel
BuildRequires: leptonica-devel
BuildRequires: cairo-devel
BuildRequires: libicu-devel
BuildRequires: pango-devel
BuildRequires: automake libtool
Obsoletes: tesseract < 3.02.02
# Tweak location of tessdata folder
Patch0: tesseract_datadir.patch
%package devel
Summary: Development files for %{fullname}
Group: Development/Libraries
Requires: %{name} = %{version}-%{release}
BuildRequires: libtiff-devel
BuildRequires: leptonica-devel
BuildRequires: cairo-devel
BuildRequires: libicu-devel
BuildRequires: pango-devel
BuildRequires: automake libtool
%package osd
Summary: Orientation & Script Detection Data for %{fullname}
Group: Applications/File
Requires: %{name} = %{version}-%{release}
%description
A commercial quality OCR engine originally developed at HP between 1985 and
1995. In 1995, this engine was among the top 3 evaluated by UNLV. It was
open-sourced by HP and UNLV in 2005.
%package devel
Summary: Development files for %{name}
Requires: %{name}%{?_isa} = %{version}-%{release}
%description devel
The %{name}-devel package contains header file for
developing applications that use %{name}.
%package osd
Summary: Orientation & Script Detection Data for %{name}
Requires: %{name}%{?_isa} = %{version}-%{release}
%description osd
Orientation & Script Detection Data for %{fullname}
Orientation & Script Detection Data for %{name}
%define lang_subpkg() \
%package langpack-%{1}\
Summary: %{2} language data for %{name}\
BuildArch: noarch\
Requires: %{name} = %{version}-%{release}\
\
%description langpack-%{1}\
%{2} language data for %{name}.\
\
%files langpack-%{1}\
%{_datadir}/%{name}/tessdata/%{1}.*
# see https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes
# and https://en.wikipedia.org/wiki/ISO_639_macrolanguage
%lang_subpkg afr Afrikaans
%lang_subpkg amh Amharic
%lang_subpkg ara Arabic
%lang_subpkg asm Assamese
%lang_subpkg aze Azerbaijani
%lang_subpkg aze_cyrl "Azerbaijani (Cyrilic)"
%lang_subpkg bel Belarusian
%lang_subpkg ben Bengali
%lang_subpkg bod "Tibetan (Standard)"
%lang_subpkg bos Bosnian
%lang_subpkg bul Bulgarian
%lang_subpkg cat Catalan
%lang_subpkg ceb Cebuano
%lang_subpkg ces Czech
%lang_subpkg chi_sim "Chinese (Simplified)"
%lang_subpkg chi_tra "Chinese (Traditional)"
%lang_subpkg chr Cherokee
%lang_subpkg cym Welsh
%lang_subpkg dan Danish
%lang_subpkg dan_frak "Danish (Fraktur)"
%lang_subpkg deu German
%lang_subpkg deu_frak "German (Fraktur)"
%lang_subpkg dzo Dzongkha
%lang_subpkg ell Greek
%lang_subpkg enm "Middle English (1100-1500)"
%lang_subpkg epo Esperanto
%lang_subpkg equ "Math / equation"
%lang_subpkg est Estonian
%lang_subpkg eus Basque
%lang_subpkg fas "Persian (Farsi)"
%lang_subpkg fin Finnish
%lang_subpkg fra French
%lang_subpkg frk Frankish
%lang_subpkg frm "Middle French (ca. 1400-1600)"
%lang_subpkg gle Irish
%lang_subpkg glg Galician
%lang_subpkg grc "Ancient Greek"
%lang_subpkg guj Gujarati
%lang_subpkg hat Haitian
%lang_subpkg heb Hebrew
%lang_subpkg hin Hindi
%lang_subpkg hrv Croatian
%lang_subpkg hun Hungarian
%lang_subpkg iku Inuktitut
%lang_subpkg ind Indonesian
%lang_subpkg isl Icelandic
%lang_subpkg ita Italian
%lang_subpkg ita_old "Italian (Old)"
%lang_subpkg jav Javanese
%lang_subpkg jpn Japanese
%lang_subpkg kan Kannada
%lang_subpkg kat Georgian
%lang_subpkg kat_old "Georgian (Old)"
%lang_subpkg kaz Kazakh
%lang_subpkg khm Khmer
%lang_subpkg kir Kyrgyz
%lang_subpkg kor Korean
%lang_subpkg kur Kurdish
%lang_subpkg lao Lao
%lang_subpkg lat Latin
%lang_subpkg lav Latvian
%lang_subpkg lit Lithuanian
%lang_subpkg mal Malayalam
%lang_subpkg mar Marathi
%lang_subpkg mkd Macedonian
%lang_subpkg mlt Maltese
%lang_subpkg msa Malay
%lang_subpkg mya Burmese
%lang_subpkg nep Nepali
%lang_subpkg nld Dutch
%lang_subpkg nor Norwegian
%lang_subpkg ori Oriya
%lang_subpkg pan Panjabi
%lang_subpkg pol Polish
%lang_subpkg por Portuguese
%lang_subpkg pus Pashto
%lang_subpkg ron Romanian
%lang_subpkg rus Russian
%lang_subpkg san Sanskrit
%lang_subpkg sin Sinhala
%lang_subpkg slk Slovakian
%lang_subpkg slk_frak "Slovakian (Fraktur)"
%lang_subpkg slv Slovenian
%lang_subpkg spa Spanish
%lang_subpkg spa_old "Spanish (Old)"
%lang_subpkg sqi Albanian
%lang_subpkg srp Serbian
%lang_subpkg srp_latn "Serbian (Latin)"
%lang_subpkg swa Swahili
%lang_subpkg swe Swedish
%lang_subpkg syr Syriac
%lang_subpkg tam Tamil
%lang_subpkg tel Telugu
%lang_subpkg tgk Tajik
%lang_subpkg tgl Tagalog
%lang_subpkg tha Thai
%lang_subpkg tir Tigrinya
%lang_subpkg tur Turkish
%lang_subpkg uig Uyghur
%lang_subpkg ukr Ukrainian
%lang_subpkg urd Urdu
%lang_subpkg uzb Uzbek
%lang_subpkg uzb_cyrl "Uzbek (Cyrillic)"
%lang_subpkg vie Vietnamese
%lang_subpkg yid Yiddish
%prep
%setup -q -n %{name}-%{version} -a1 -a2
%setup -q -n %{name}-%{version} -a1
%patch0 -p1
%build
sed -i 's#-DTESSDATA_PREFIX=@datadir@/#-DTESSDATA_PREFIX=@datadir@/%{name}/##' ccutil/Makefile.*
autoreconf -ifv
%configure --disable-static
make %{?_smp_mflags}
# Remove compiled files, see https://groups.google.com/forum/#!topic/tesseract-dev/ARKOSV3zpWo
make -C training clean
make %{?_smp_mflags} training
%install
%make_install
%make_install training-install
rm -f %{buildroot}%{_libdir}/*la
mkdir -p %{buildroot}%{_datadir}/%{name}
mv %{buildroot}%{_datadir}/tessdata %{buildroot}%{_datadir}/%{name}
install -m 0644 %{fullname}/tessdata/* %{buildroot}%{_datadir}/%{name}/tessdata
find %{buildroot}%{_libdir} -type f -name '*.la' -delete
install -pm 0644 tessdata-%{version}/* %{buildroot}/%{_datadir}/%{name}/tessdata/
%post -p /sbin/ldconfig
%postun -p /sbin/ldconfig
%files
%license COPYING
%doc AUTHORS ChangeLog NEWS README testing/eurotext.tif testing/phototest.tif
%{_bindir}/ambiguous_words
%{_bindir}/classifier_tester
%{_bindir}/combine_tessdata
@ -82,17 +204,14 @@ install -m 0644 %{fullname}/tessdata/* %{buildroot}%{_datadir}/%{name}/tessdata
%{_bindir}/wordlist2dawg
%dir %{_datadir}/%{name}
%dir %{_datadir}/%{name}/tessdata
%{_datadir}/%{name}/tessdata/configs
%{_datadir}/%{name}/tessdata/tessconfigs
%{_datadir}/%{name}/tessdata/configs/
%{_datadir}/%{name}/tessdata/tessconfigs/
%{_datadir}/%{name}/tessdata/eng.*
%{_datadir}/%{name}/tessdata/pdf.ttf
%{_datadir}/%{name}/tessdata/pdf.ttx
%{_libdir}/lib%{name}*.so.*
%{_libdir}/lib%{name}*.so.3*
%{_mandir}/man1/*
%{_mandir}/man5/*
%doc AUTHORS ChangeLog COPYING eurotext.tif NEWS phototest.tif README
%files devel
%{_includedir}/%{name}
%{_libdir}/lib%{name}*.so
@ -101,7 +220,11 @@ install -m 0644 %{fullname}/tessdata/* %{buildroot}%{_datadir}/%{name}/tessdata
%files osd
%{_datadir}/%{name}/tessdata/osd.traineddata
%changelog
* Sat Sep 12 2015 Sandro Mani <manisandro@gmail.com> - 3.04.00-1
- Update to 3.04.00
* Fri Jun 19 2015 Fedora Release Engineering <rel-eng@lists.fedoraproject.org> - 3.03-0.6.rc1
- Rebuilt for https://fedoraproject.org/wiki/Fedora_23_Mass_Rebuild

37
tesseract_datadir.patch Normal file
View File

@ -0,0 +1,37 @@
diff -rupN tesseract-3.04.00/ccutil/Makefile.am tesseract-3.04.00-new/ccutil/Makefile.am
--- tesseract-3.04.00/ccutil/Makefile.am 2015-07-11 09:53:12.000000000 +0200
+++ tesseract-3.04.00-new/ccutil/Makefile.am 2015-09-12 19:10:31.983919381 +0200
@@ -3,7 +3,7 @@ SUBDIRS =
AM_CXXFLAGS =
if !NO_TESSDATA_PREFIX
-AM_CXXFLAGS += -DTESSDATA_PREFIX=@datadir@/
+AM_CXXFLAGS += -DTESSDATA_PREFIX=@datadir@/tesseract/
endif
if VISIBILITY
diff -rupN tesseract-3.04.00/tessdata/configs/Makefile.am tesseract-3.04.00-new/tessdata/configs/Makefile.am
--- tesseract-3.04.00/tessdata/configs/Makefile.am 2015-07-11 09:53:12.000000000 +0200
+++ tesseract-3.04.00-new/tessdata/configs/Makefile.am 2015-09-12 19:10:40.978587765 +0200
@@ -1,3 +1,3 @@
-datadir = @datadir@/tessdata/configs
+datadir = @datadir@/tesseract/tessdata/configs
data_DATA = inter makebox box.train unlv ambigs.train api_config kannada box.train.stderr quiet logfile digits hocr linebox pdf rebox strokewidth bigram
EXTRA_DIST = inter makebox box.train unlv ambigs.train api_config kannada box.train.stderr quiet logfile digits hocr linebox pdf rebox strokewidth bigram
diff -rupN tesseract-3.04.00/tessdata/Makefile.am tesseract-3.04.00-new/tessdata/Makefile.am
--- tesseract-3.04.00/tessdata/Makefile.am 2015-07-11 09:53:12.000000000 +0200
+++ tesseract-3.04.00-new/tessdata/Makefile.am 2015-09-12 19:10:31.985919304 +0200
@@ -1,4 +1,4 @@
-datadir = @datadir@/tessdata
+datadir = @datadir@/tesseract/tessdata
data_DATA = pdf.ttf
EXTRA_DIST = $(data_DATA)
diff -rupN tesseract-3.04.00/tessdata/tessconfigs/Makefile.am tesseract-3.04.00-new/tessdata/tessconfigs/Makefile.am
--- tesseract-3.04.00/tessdata/tessconfigs/Makefile.am 2015-07-11 09:53:12.000000000 +0200
+++ tesseract-3.04.00-new/tessdata/tessconfigs/Makefile.am 2015-09-12 19:10:48.218340816 +0200
@@ -1,3 +1,3 @@
-datadir = @datadir@/tessdata/tessconfigs
+datadir = @datadir@/tesseract/tessdata/tessconfigs
data_DATA = batch batch.nochop nobatch matdemo segdemo msdemo
EXTRA_DIST = batch batch.nochop nobatch matdemo segdemo msdemo