Update to version 3.04.00
This commit is contained in:
parent
f03fe441cb
commit
e63c31a8f2
2
.gitignore
vendored
2
.gitignore
vendored
@ -8,3 +8,5 @@ tesseract-2.00.eng.tar.gz
|
|||||||
/tesseract-ocr-3.02.eng.tar.gz
|
/tesseract-ocr-3.02.eng.tar.gz
|
||||||
/tesseract-ocr-3.01.osd.tar.gz
|
/tesseract-ocr-3.01.osd.tar.gz
|
||||||
/tesseract-3.03-rc1.tar.gz
|
/tesseract-3.03-rc1.tar.gz
|
||||||
|
/tesseract-3.04.00.tar.gz
|
||||||
|
/tessdata-3.04.00.tar.gz
|
||||||
|
5
sources
5
sources
@ -1,3 +1,2 @@
|
|||||||
d69ceca9ae70e0b7020d0f92d60b8565 tesseract-3.03-rc1.tar.gz
|
078130b9c7d28c558a0e49d432505864 tesseract-3.04.00.tar.gz
|
||||||
683486e01f5b87c17f2f5815f770ccb3 tesseract-ocr-3.01.osd.tar.gz
|
b25e830d203af5c863081af3f684b53a tessdata-3.04.00.tar.gz
|
||||||
3562250fe6f4e76229a329166b8ae853 tesseract-ocr-3.02.eng.tar.gz
|
|
||||||
|
199
tesseract.spec
199
tesseract.spec
@ -1,74 +1,196 @@
|
|||||||
%global fullname tesseract-ocr
|
|
||||||
%global pre rc1
|
|
||||||
|
|
||||||
Name: tesseract
|
Name: tesseract
|
||||||
Version: 3.03
|
Version: 3.04.00
|
||||||
Release: 0.6%{?pre:.%pre}%{?dist}
|
Release: 1%{?dist}
|
||||||
Summary: Raw OCR Engine
|
Summary: Raw OCR Engine
|
||||||
|
|
||||||
Group: Applications/File
|
|
||||||
License: ASL 2.0
|
License: ASL 2.0
|
||||||
URL: http://code.google.com/p/%{fullname}/
|
URL: https://github.com/tesseract-ocr/%{name}
|
||||||
# The downloads are now posted on google-drive which has impossible download URLS...
|
Source0: https://github.com/tesseract-ocr/tesseract/archive/%{version}.tar.gz#/%{name}-%{version}.tar.gz
|
||||||
# The url of the drive is
|
Source1: https://github.com/tesseract-ocr/tessdata/archive/%{version}.tar.gz#/tessdata-%{version}.tar.gz
|
||||||
# https://drive.google.com/folderview?id=0B7l10Bj_LprhQnpSRkpGMGV2eE0
|
|
||||||
Source0: %{name}-%{version}%{?pre:-%pre}.tar.gz
|
# Tweak location of tessdata folder
|
||||||
Source1: http://tesseract-ocr.googlecode.com/files/%{fullname}-3.02.eng.tar.gz
|
Patch0: tesseract_datadir.patch
|
||||||
Source2: http://tesseract-ocr.googlecode.com/files/%{fullname}-3.01.osd.tar.gz
|
|
||||||
BuildRequires: libtiff-devel
|
BuildRequires: libtiff-devel
|
||||||
BuildRequires: leptonica-devel
|
BuildRequires: leptonica-devel
|
||||||
BuildRequires: cairo-devel
|
BuildRequires: cairo-devel
|
||||||
BuildRequires: libicu-devel
|
BuildRequires: libicu-devel
|
||||||
BuildRequires: pango-devel
|
BuildRequires: pango-devel
|
||||||
BuildRequires: automake libtool
|
BuildRequires: automake libtool
|
||||||
Obsoletes: tesseract < 3.02.02
|
|
||||||
|
|
||||||
%package devel
|
|
||||||
Summary: Development files for %{fullname}
|
|
||||||
Group: Development/Libraries
|
|
||||||
Requires: %{name} = %{version}-%{release}
|
|
||||||
|
|
||||||
%package osd
|
|
||||||
Summary: Orientation & Script Detection Data for %{fullname}
|
|
||||||
Group: Applications/File
|
|
||||||
Requires: %{name} = %{version}-%{release}
|
|
||||||
|
|
||||||
%description
|
%description
|
||||||
A commercial quality OCR engine originally developed at HP between 1985 and
|
A commercial quality OCR engine originally developed at HP between 1985 and
|
||||||
1995. In 1995, this engine was among the top 3 evaluated by UNLV. It was
|
1995. In 1995, this engine was among the top 3 evaluated by UNLV. It was
|
||||||
open-sourced by HP and UNLV in 2005.
|
open-sourced by HP and UNLV in 2005.
|
||||||
|
|
||||||
|
|
||||||
|
%package devel
|
||||||
|
Summary: Development files for %{name}
|
||||||
|
Requires: %{name}%{?_isa} = %{version}-%{release}
|
||||||
|
|
||||||
%description devel
|
%description devel
|
||||||
The %{name}-devel package contains header file for
|
The %{name}-devel package contains header file for
|
||||||
developing applications that use %{name}.
|
developing applications that use %{name}.
|
||||||
|
|
||||||
|
|
||||||
|
%package osd
|
||||||
|
Summary: Orientation & Script Detection Data for %{name}
|
||||||
|
Requires: %{name}%{?_isa} = %{version}-%{release}
|
||||||
|
|
||||||
%description osd
|
%description osd
|
||||||
Orientation & Script Detection Data for %{fullname}
|
Orientation & Script Detection Data for %{name}
|
||||||
|
|
||||||
|
%define lang_subpkg() \
|
||||||
|
%package langpack-%{1}\
|
||||||
|
Summary: %{2} language data for %{name}\
|
||||||
|
BuildArch: noarch\
|
||||||
|
Requires: %{name} = %{version}-%{release}\
|
||||||
|
\
|
||||||
|
%description langpack-%{1}\
|
||||||
|
%{2} language data for %{name}.\
|
||||||
|
\
|
||||||
|
%files langpack-%{1}\
|
||||||
|
%{_datadir}/%{name}/tessdata/%{1}.*
|
||||||
|
|
||||||
|
# see https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes
|
||||||
|
# and https://en.wikipedia.org/wiki/ISO_639_macrolanguage
|
||||||
|
%lang_subpkg afr Afrikaans
|
||||||
|
%lang_subpkg amh Amharic
|
||||||
|
%lang_subpkg ara Arabic
|
||||||
|
%lang_subpkg asm Assamese
|
||||||
|
%lang_subpkg aze Azerbaijani
|
||||||
|
%lang_subpkg aze_cyrl "Azerbaijani (Cyrilic)"
|
||||||
|
%lang_subpkg bel Belarusian
|
||||||
|
%lang_subpkg ben Bengali
|
||||||
|
%lang_subpkg bod "Tibetan (Standard)"
|
||||||
|
%lang_subpkg bos Bosnian
|
||||||
|
%lang_subpkg bul Bulgarian
|
||||||
|
%lang_subpkg cat Catalan
|
||||||
|
%lang_subpkg ceb Cebuano
|
||||||
|
%lang_subpkg ces Czech
|
||||||
|
%lang_subpkg chi_sim "Chinese (Simplified)"
|
||||||
|
%lang_subpkg chi_tra "Chinese (Traditional)"
|
||||||
|
%lang_subpkg chr Cherokee
|
||||||
|
%lang_subpkg cym Welsh
|
||||||
|
%lang_subpkg dan Danish
|
||||||
|
%lang_subpkg dan_frak "Danish (Fraktur)"
|
||||||
|
%lang_subpkg deu German
|
||||||
|
%lang_subpkg deu_frak "German (Fraktur)"
|
||||||
|
%lang_subpkg dzo Dzongkha
|
||||||
|
%lang_subpkg ell Greek
|
||||||
|
%lang_subpkg enm "Middle English (1100-1500)"
|
||||||
|
%lang_subpkg epo Esperanto
|
||||||
|
%lang_subpkg equ "Math / equation"
|
||||||
|
%lang_subpkg est Estonian
|
||||||
|
%lang_subpkg eus Basque
|
||||||
|
%lang_subpkg fas "Persian (Farsi)"
|
||||||
|
%lang_subpkg fin Finnish
|
||||||
|
%lang_subpkg fra French
|
||||||
|
%lang_subpkg frk Frankish
|
||||||
|
%lang_subpkg frm "Middle French (ca. 1400-1600)"
|
||||||
|
%lang_subpkg gle Irish
|
||||||
|
%lang_subpkg glg Galician
|
||||||
|
%lang_subpkg grc "Ancient Greek"
|
||||||
|
%lang_subpkg guj Gujarati
|
||||||
|
%lang_subpkg hat Haitian
|
||||||
|
%lang_subpkg heb Hebrew
|
||||||
|
%lang_subpkg hin Hindi
|
||||||
|
%lang_subpkg hrv Croatian
|
||||||
|
%lang_subpkg hun Hungarian
|
||||||
|
%lang_subpkg iku Inuktitut
|
||||||
|
%lang_subpkg ind Indonesian
|
||||||
|
%lang_subpkg isl Icelandic
|
||||||
|
%lang_subpkg ita Italian
|
||||||
|
%lang_subpkg ita_old "Italian (Old)"
|
||||||
|
%lang_subpkg jav Javanese
|
||||||
|
%lang_subpkg jpn Japanese
|
||||||
|
%lang_subpkg kan Kannada
|
||||||
|
%lang_subpkg kat Georgian
|
||||||
|
%lang_subpkg kat_old "Georgian (Old)"
|
||||||
|
%lang_subpkg kaz Kazakh
|
||||||
|
%lang_subpkg khm Khmer
|
||||||
|
%lang_subpkg kir Kyrgyz
|
||||||
|
%lang_subpkg kor Korean
|
||||||
|
%lang_subpkg kur Kurdish
|
||||||
|
%lang_subpkg lao Lao
|
||||||
|
%lang_subpkg lat Latin
|
||||||
|
%lang_subpkg lav Latvian
|
||||||
|
%lang_subpkg lit Lithuanian
|
||||||
|
%lang_subpkg mal Malayalam
|
||||||
|
%lang_subpkg mar Marathi
|
||||||
|
%lang_subpkg mkd Macedonian
|
||||||
|
%lang_subpkg mlt Maltese
|
||||||
|
%lang_subpkg msa Malay
|
||||||
|
%lang_subpkg mya Burmese
|
||||||
|
%lang_subpkg nep Nepali
|
||||||
|
%lang_subpkg nld Dutch
|
||||||
|
%lang_subpkg nor Norwegian
|
||||||
|
%lang_subpkg ori Oriya
|
||||||
|
%lang_subpkg pan Panjabi
|
||||||
|
%lang_subpkg pol Polish
|
||||||
|
%lang_subpkg por Portuguese
|
||||||
|
%lang_subpkg pus Pashto
|
||||||
|
%lang_subpkg ron Romanian
|
||||||
|
%lang_subpkg rus Russian
|
||||||
|
%lang_subpkg san Sanskrit
|
||||||
|
%lang_subpkg sin Sinhala
|
||||||
|
%lang_subpkg slk Slovakian
|
||||||
|
%lang_subpkg slk_frak "Slovakian (Fraktur)"
|
||||||
|
%lang_subpkg slv Slovenian
|
||||||
|
%lang_subpkg spa Spanish
|
||||||
|
%lang_subpkg spa_old "Spanish (Old)"
|
||||||
|
%lang_subpkg sqi Albanian
|
||||||
|
%lang_subpkg srp Serbian
|
||||||
|
%lang_subpkg srp_latn "Serbian (Latin)"
|
||||||
|
%lang_subpkg swa Swahili
|
||||||
|
%lang_subpkg swe Swedish
|
||||||
|
%lang_subpkg syr Syriac
|
||||||
|
%lang_subpkg tam Tamil
|
||||||
|
%lang_subpkg tel Telugu
|
||||||
|
%lang_subpkg tgk Tajik
|
||||||
|
%lang_subpkg tgl Tagalog
|
||||||
|
%lang_subpkg tha Thai
|
||||||
|
%lang_subpkg tir Tigrinya
|
||||||
|
%lang_subpkg tur Turkish
|
||||||
|
%lang_subpkg uig Uyghur
|
||||||
|
%lang_subpkg ukr Ukrainian
|
||||||
|
%lang_subpkg urd Urdu
|
||||||
|
%lang_subpkg uzb Uzbek
|
||||||
|
%lang_subpkg uzb_cyrl "Uzbek (Cyrillic)"
|
||||||
|
%lang_subpkg vie Vietnamese
|
||||||
|
%lang_subpkg yid Yiddish
|
||||||
|
|
||||||
|
|
||||||
%prep
|
%prep
|
||||||
%setup -q -n %{name}-%{version} -a1 -a2
|
%setup -q -n %{name}-%{version} -a1
|
||||||
|
%patch0 -p1
|
||||||
|
|
||||||
|
|
||||||
%build
|
%build
|
||||||
sed -i 's#-DTESSDATA_PREFIX=@datadir@/#-DTESSDATA_PREFIX=@datadir@/%{name}/##' ccutil/Makefile.*
|
|
||||||
autoreconf -ifv
|
autoreconf -ifv
|
||||||
%configure --disable-static
|
%configure --disable-static
|
||||||
|
|
||||||
make %{?_smp_mflags}
|
make %{?_smp_mflags}
|
||||||
# Remove compiled files, see https://groups.google.com/forum/#!topic/tesseract-dev/ARKOSV3zpWo
|
|
||||||
make -C training clean
|
|
||||||
make %{?_smp_mflags} training
|
make %{?_smp_mflags} training
|
||||||
|
|
||||||
|
|
||||||
%install
|
%install
|
||||||
%make_install
|
%make_install
|
||||||
%make_install training-install
|
%make_install training-install
|
||||||
rm -f %{buildroot}%{_libdir}/*la
|
|
||||||
mkdir -p %{buildroot}%{_datadir}/%{name}
|
find %{buildroot}%{_libdir} -type f -name '*.la' -delete
|
||||||
mv %{buildroot}%{_datadir}/tessdata %{buildroot}%{_datadir}/%{name}
|
|
||||||
install -m 0644 %{fullname}/tessdata/* %{buildroot}%{_datadir}/%{name}/tessdata
|
install -pm 0644 tessdata-%{version}/* %{buildroot}/%{_datadir}/%{name}/tessdata/
|
||||||
|
|
||||||
|
|
||||||
%post -p /sbin/ldconfig
|
%post -p /sbin/ldconfig
|
||||||
%postun -p /sbin/ldconfig
|
%postun -p /sbin/ldconfig
|
||||||
|
|
||||||
|
|
||||||
%files
|
%files
|
||||||
|
%license COPYING
|
||||||
|
%doc AUTHORS ChangeLog NEWS README testing/eurotext.tif testing/phototest.tif
|
||||||
%{_bindir}/ambiguous_words
|
%{_bindir}/ambiguous_words
|
||||||
%{_bindir}/classifier_tester
|
%{_bindir}/classifier_tester
|
||||||
%{_bindir}/combine_tessdata
|
%{_bindir}/combine_tessdata
|
||||||
@ -82,17 +204,14 @@ install -m 0644 %{fullname}/tessdata/* %{buildroot}%{_datadir}/%{name}/tessdata
|
|||||||
%{_bindir}/wordlist2dawg
|
%{_bindir}/wordlist2dawg
|
||||||
%dir %{_datadir}/%{name}
|
%dir %{_datadir}/%{name}
|
||||||
%dir %{_datadir}/%{name}/tessdata
|
%dir %{_datadir}/%{name}/tessdata
|
||||||
%{_datadir}/%{name}/tessdata/configs
|
%{_datadir}/%{name}/tessdata/configs/
|
||||||
%{_datadir}/%{name}/tessdata/tessconfigs
|
%{_datadir}/%{name}/tessdata/tessconfigs/
|
||||||
%{_datadir}/%{name}/tessdata/eng.*
|
%{_datadir}/%{name}/tessdata/eng.*
|
||||||
%{_datadir}/%{name}/tessdata/pdf.ttf
|
%{_datadir}/%{name}/tessdata/pdf.ttf
|
||||||
%{_datadir}/%{name}/tessdata/pdf.ttx
|
%{_libdir}/lib%{name}*.so.3*
|
||||||
%{_libdir}/lib%{name}*.so.*
|
|
||||||
%{_mandir}/man1/*
|
%{_mandir}/man1/*
|
||||||
%{_mandir}/man5/*
|
%{_mandir}/man5/*
|
||||||
|
|
||||||
%doc AUTHORS ChangeLog COPYING eurotext.tif NEWS phototest.tif README
|
|
||||||
|
|
||||||
%files devel
|
%files devel
|
||||||
%{_includedir}/%{name}
|
%{_includedir}/%{name}
|
||||||
%{_libdir}/lib%{name}*.so
|
%{_libdir}/lib%{name}*.so
|
||||||
@ -101,7 +220,11 @@ install -m 0644 %{fullname}/tessdata/* %{buildroot}%{_datadir}/%{name}/tessdata
|
|||||||
%files osd
|
%files osd
|
||||||
%{_datadir}/%{name}/tessdata/osd.traineddata
|
%{_datadir}/%{name}/tessdata/osd.traineddata
|
||||||
|
|
||||||
|
|
||||||
%changelog
|
%changelog
|
||||||
|
* Sat Sep 12 2015 Sandro Mani <manisandro@gmail.com> - 3.04.00-1
|
||||||
|
- Update to 3.04.00
|
||||||
|
|
||||||
* Fri Jun 19 2015 Fedora Release Engineering <rel-eng@lists.fedoraproject.org> - 3.03-0.6.rc1
|
* Fri Jun 19 2015 Fedora Release Engineering <rel-eng@lists.fedoraproject.org> - 3.03-0.6.rc1
|
||||||
- Rebuilt for https://fedoraproject.org/wiki/Fedora_23_Mass_Rebuild
|
- Rebuilt for https://fedoraproject.org/wiki/Fedora_23_Mass_Rebuild
|
||||||
|
|
||||||
|
37
tesseract_datadir.patch
Normal file
37
tesseract_datadir.patch
Normal file
@ -0,0 +1,37 @@
|
|||||||
|
diff -rupN tesseract-3.04.00/ccutil/Makefile.am tesseract-3.04.00-new/ccutil/Makefile.am
|
||||||
|
--- tesseract-3.04.00/ccutil/Makefile.am 2015-07-11 09:53:12.000000000 +0200
|
||||||
|
+++ tesseract-3.04.00-new/ccutil/Makefile.am 2015-09-12 19:10:31.983919381 +0200
|
||||||
|
@@ -3,7 +3,7 @@ SUBDIRS =
|
||||||
|
AM_CXXFLAGS =
|
||||||
|
|
||||||
|
if !NO_TESSDATA_PREFIX
|
||||||
|
-AM_CXXFLAGS += -DTESSDATA_PREFIX=@datadir@/
|
||||||
|
+AM_CXXFLAGS += -DTESSDATA_PREFIX=@datadir@/tesseract/
|
||||||
|
endif
|
||||||
|
|
||||||
|
if VISIBILITY
|
||||||
|
diff -rupN tesseract-3.04.00/tessdata/configs/Makefile.am tesseract-3.04.00-new/tessdata/configs/Makefile.am
|
||||||
|
--- tesseract-3.04.00/tessdata/configs/Makefile.am 2015-07-11 09:53:12.000000000 +0200
|
||||||
|
+++ tesseract-3.04.00-new/tessdata/configs/Makefile.am 2015-09-12 19:10:40.978587765 +0200
|
||||||
|
@@ -1,3 +1,3 @@
|
||||||
|
-datadir = @datadir@/tessdata/configs
|
||||||
|
+datadir = @datadir@/tesseract/tessdata/configs
|
||||||
|
data_DATA = inter makebox box.train unlv ambigs.train api_config kannada box.train.stderr quiet logfile digits hocr linebox pdf rebox strokewidth bigram
|
||||||
|
EXTRA_DIST = inter makebox box.train unlv ambigs.train api_config kannada box.train.stderr quiet logfile digits hocr linebox pdf rebox strokewidth bigram
|
||||||
|
diff -rupN tesseract-3.04.00/tessdata/Makefile.am tesseract-3.04.00-new/tessdata/Makefile.am
|
||||||
|
--- tesseract-3.04.00/tessdata/Makefile.am 2015-07-11 09:53:12.000000000 +0200
|
||||||
|
+++ tesseract-3.04.00-new/tessdata/Makefile.am 2015-09-12 19:10:31.985919304 +0200
|
||||||
|
@@ -1,4 +1,4 @@
|
||||||
|
-datadir = @datadir@/tessdata
|
||||||
|
+datadir = @datadir@/tesseract/tessdata
|
||||||
|
|
||||||
|
data_DATA = pdf.ttf
|
||||||
|
EXTRA_DIST = $(data_DATA)
|
||||||
|
diff -rupN tesseract-3.04.00/tessdata/tessconfigs/Makefile.am tesseract-3.04.00-new/tessdata/tessconfigs/Makefile.am
|
||||||
|
--- tesseract-3.04.00/tessdata/tessconfigs/Makefile.am 2015-07-11 09:53:12.000000000 +0200
|
||||||
|
+++ tesseract-3.04.00-new/tessdata/tessconfigs/Makefile.am 2015-09-12 19:10:48.218340816 +0200
|
||||||
|
@@ -1,3 +1,3 @@
|
||||||
|
-datadir = @datadir@/tessdata/tessconfigs
|
||||||
|
+datadir = @datadir@/tesseract/tessdata/tessconfigs
|
||||||
|
data_DATA = batch batch.nochop nobatch matdemo segdemo msdemo
|
||||||
|
EXTRA_DIST = batch batch.nochop nobatch matdemo segdemo msdemo
|
Loading…
Reference in New Issue
Block a user