# BEGIN SourceDeps(oneline):
BuildRequires: perl(CPAN.pm) perl(Carp.pm) perl(Config.pm) perl(Cwd.pm) perl(Encode.pm) perl(ExtUtils/MakeMaker.pm) perl(Fcntl.pm) perl(File/Basename.pm) perl(File/Find.pm) perl(File/ShareDir.pm) perl(FileHandle.pm) perl(IO/File.pm) perl(IO/Uncompress/Gunzip.pm) perl(JSON.pm) perl(LWP/UserAgent.pm) perl(List/MoreUtils.pm) perl(Module/Build.pm) perl(Net/FTP.pm) perl(Parse/CPAN/Meta.pm) perl(Pod/Usage.pm) perl(Socket.pm) perl(Test/Deep.pm) perl(Test/Exception.pm) perl(Test/Number/Delta.pm) perl(Unicode/Normalize.pm) perl(YAML/Tiny.pm) perl(base.pm) perl(open.pm) perl(parent.pm)
# END SourceDeps(oneline)
%define module_name Lingua-RU-OpenCorpora-Tokenizer
%define _unpackaged_files_terminate_build 1
BuildRequires: rpm-build-perl perl-devel perl-podlators

Name: perl-%module_name
Version: 0.06
Release: alt1
Summary: tokenizer for OpenCorpora project
Group: Development/Perl
License: perl
Url: %CPAN %module_name

Source0: http://mirror.yandex.ru/mirrors/cpan/authors/id/K/KS/KSURI/%{module_name}-%{version}.tar.gz
BuildArch: noarch

%description
This module tokenizes input texts in Russian language.

Note that it uses probabilistic algorithm rather than trying to parse the language. It also uses some pre-calculated data freely provided by OpenCorpora project.

NOTE: OpenCorpora periodically provides updates for this data. Checkout `opencorpora-update-tokenizer' script that comes with this distribution.

The algorithm is this:

=over 4

=item 1. Split text into chars.

=item 2. Iterate over the chars from left to right.

=item 3. For every char get its context (see the CONTEXT manpage).

=item 4. Find probability for the context in vectors file (see the VECTORS FILE entry elsewhere in this document) or use the default value - 0.5.

=back

%package scripts
Summary: %module_name scripts
Group: Development/Perl
Requires: %name = %EVR

%description scripts
scripts for %module_name

%prep
%setup -q -n %{module_name}-%{version}

%build
%perl_vendor_build

%install
%perl_vendor_install

%files
%doc Changelog README
%perl_vendor_privlib/L*
%perl_vendor_privlib/auto/*

%files scripts
%_bindir/*
%_man1dir/*

%changelog
