#!/usr/bin/perl -w use strict; use vars qw($VERSION); use Text::Ngrams; $VERSION = sprintf "%d.%d", q$Revision: 1.3 $ =~ /(\d+)/g; use Getopt::Long; my ($help, $version, $orderbyfrequency); my $n = 3; my $type = 'character'; sub help { print < \$n, 'type=s' => \$type, 'help' => \$help, 'version' => \$version, 'orderbyfrequency' => \$orderbyfrequency); help() if $n < 1 || int($n) != $n; sub version { print $VERSION, "\n"; exit(1); } help() if $help; version() if $version; my $ng = Text::Ngrams->new( windowsize=>$n, type=>$type); if ($#ARGV > -1) { $ng->process_files(@ARGV) } else { $ng->process_files(\*STDIN) } print $orderbyfrequency ? $ng->to_string( orderby=>'frequency' ) : $ng->to_string(); exit(0); __END__ =head1 NAME ngrams - Compute the ngram frequencies and produce tables to the stdout. =head1 SYNOPIS ngram [--version] [--help] [--n=3] [--type=character] [--orderbyfrequency] [input files] =head1 DESCRIPTION This script produces n-grams tables of the input files to the standard ouput. Options: =over 4 =item --version Prints version. =item --help Prints help. =item --n=NUMBER N-gram size, produces 3-grams by default. =item --type=character|byte|word Type of n-grams produces. See Text::Ngrams module. =item --orderbyfrequency By default, the n-grams are ordered lexicographically. If this option is specified, then they are ordered by frequency in descending order. =head1 PREREQUISITES Text::Ngrams, Getopt::Long =head1 SCRIPT CATEGORIES Text::Statistics =head1 SEE ALSO Text::Ngrams module. =head1 COPYRIGHT Copyright 2003 Vlado Keselj F This module is provided "as is" without expressed or implied warranty. This is free software; you can redistribute it and/or modify it under the same terms as Perl itself. The latest version can be found at F. =cut