Group: Other
# see https://bugzilla.altlinux.org/show_bug.cgi?id=10382
%define _localstatedir %{_var}
Name:			textcat
Version:		1.10
Release:		alt2_23
Summary:		Written language identification
%{?el5:Group:		Applications/Text}

# Automatically converted from old format: LGPLv2+ - review is highly recommended.
License:		LicenseRef-Callaway-LGPLv2+
URL:			http://www.let.rug.nl/~vannoord/TextCat/
Source0:		http://www.let.rug.nl/~vannoord/TextCat/text_cat.tgz
Source1:		http://www.let.rug.nl/~vannoord/TextCat/%{name}.pdf

BuildRequires:		perl-devel
BuildRequires:		rpm-build-perl
BuildRequires:		perl(Benchmark.pm)
BuildRequires:		perl(Getopt/Std.pm)
BuildRequires:		perl(strict.pm)
BuildRequires:		perl(vars.pm)

BuildArch:		noarch
%{?el5:BuildRoot:	%(mktemp -ud %{_var}/tmp/%{name}-%{version}-%{release}-XXXXXX)}
Source44: import.info

%description
TextCat is an implementation of the text categorization algorithm
presented in Cavnar, W. B. and J. M. Trenkle, "N-Gram-Based Text
Categorization".  TextCat uses this the technique to implement a
written language identification.  At the moment, it knows about 69
natural languages (counting Esperanto as a natural language).


%prep
%setup -qc
cp -a %{SOURCE1} .


%build
sed	-e '1{/^#!.*/d}' < text_cat > %{name}
sed -i	-e '1s~^~#!/usr/bin/perl -w\n~'						\
	-e 's!/users1/vannoord/Perl/TextCat/LM!%{_datadir}/%{name}/lm!g'	\
	%{name}
touch	-r text_cat %{name}


%install
%{?el5:rm -rf %{buildroot}}
mkdir	-p %{buildroot}%{_bindir} %{buildroot}%{_datadir}/%{name}/lm
install -pm0755 %{name} %{buildroot}%{_bindir}
install -pm0644 LM/* %{buildroot}%{_datadir}/%{name}/lm


%check
sed	-e 's!%{_datadir}/%{name}/lm!%{buildroot}&!g'				\
	< %{name} > %{name}_test
for _test in `find ShortTexts/ -name '*.txt' | sort -u`
do
  %{__perl} -w %{name}_test ${_test}
done



%files
%doc CHANGES COPYING Copyright README %{name}.pdf
%{_bindir}/%{name}
%{_datadir}/%{name}


%changelog
