~mcepl/num-utils

c1ba275861dba978fde6784c4538694297a4b940 — Suso Banderas 20 years ago ba491df 0.4
Fourth Beta Release v0.4

- Added -R option to normalize
- updated utils to include man page reference to normalize
- updated range program to include man page reference to seq(1)
- Added feedback directory and files to CVS.
- Added normalize program and documentation.
- Added median function and -M option
- Added -l option for using the lower median number on even sets.
- Added find_mode subroutine to average program and respective
  documentation.
17 files changed, 322 insertions(+), 21 deletions(-)

M CHANGELOG
M GOALS
M MANIFEST
M Makefile
M VERSION
M average
M bound
M interval
A normalize
M num-utils.spec.in
M numgrep
M numprocess
M numsum
M random
M range
M round
M template
M CHANGELOG => CHANGELOG +25 -0
@@ 1,4 1,29 @@

 Sept. 23rd, 2003 22:23 GMT
----------------------------
 Fourth Beta Release v0.4

 - Added -R option to normalize
 - updated utils to include man page reference to normalize
 - updated range program to include man page reference to seq(1)
 - Added feedback directory and files to CVS.

 Sept. 22nd, 2003 22:24 GMT
----------------------------

 - Added normalize program and documentation.

 Sept. 21st, 2003 20:51 GMT
---------------------------

 - Added median function and -M option
 - Added -l option for using the lower median number on even sets.

 Sept. 19th, 2003 15:50 GMT
----------------------------

 - Added find_mode subroutine to average program and respective documentation.

 June  9th, 2003 14:48 GMT
---------------------------
 Third Beta Release v0.3

M GOALS => GOALS +10 -0
@@ 327,6 327,7 @@ mode values.

              1 2 9 12 88 100 1000
                    ^^
   -l   -- Use the lower number of the median on even counted sets.

   -a   -- average all numbers in the file, not just the first ones found on
           each line.   


@@ 357,6 358,15 @@ mode values.

 Usage options  (in addition to the standard options)


-- normalize --

  Usage options:

  -R <range>  --  THis is for specifying a range to normalize for instead of 0..1



-- round --

   This utility will round each number encountered up or down depending on it's

M MANIFEST => MANIFEST +1 -0
@@ 19,3 19,4 @@ bound
numprocess
tests
interval
normalize

M Makefile => Makefile +2 -2
@@ 1,4 1,4 @@
# $Id: Makefile,v 1.11 2003/06/03 23:25:58 suso Exp $
# $Id: Makefile,v 1.12 2003/09/23 22:26:12 suso Exp $
# num-utils         A set of programs for doing operations on numbers
# Copyright (C) 2002-2003 Suso Banderas



@@ 22,7 22,7 @@ VERSION	= $(shell cat VERSION)
PROJECT	= num-utils
DIST	= $(PROJECT)-$(VERSION)
FILES	= $(shell cat MANIFEST)
UTILS	= average bound interval numgrep numprocess numsum random range round
UTILS	= average bound interval normalize numgrep numprocess numsum random range round
DOCS	= CHANGELOG COPYING LICENSE MANIFEST template README GOALS WARNING
TESTS	= file fractionalnums numbers numbers2 README zeros
# rpm --showrc is gettin to be hard to parse anymore.

M VERSION => VERSION +1 -1
@@ 1,1 1,1 @@
0.3
0.4

M average => average +70 -8
@@ 28,7 28,7 @@ use Getopt::Std;
use strict;
use vars qw/ %opts $verbose /;

getopts('dhiIqV', \%opts);
getopts('dhiIlmMqV', \%opts);

if ($opts{'h'}) {
    &help();


@@ 69,8 69,16 @@ if (@ARGV) {
    process_filehandle(\*STDIN, \@number_array);
}

my $average = calculate_mean(\@number_array);
my $average;
if ($opts{'m'}) {
    $average = find_mode(\@number_array);
} elsif ($opts{'M'}) {
    $average = find_median(\@number_array);
} else {   
    $average = calculate_mean(\@number_array);
}

# post processing.
if ($opts{'i'}) {
    $average = int($average);
} elsif ($opts{'I'}) {


@@ 102,6 110,11 @@ Options:
        -i      Only return the integer portion of the final sum.
        -I      Only return the decimal portion of the final sum

        -m      Find the mode (most occuring) of the list of numbers.
        -M      Find the median (middle number) of the list of numbers.
        -l      When finding the median and the count of numbers in the set is even,
                use the lower middle number instead of the upper middle number.

        -d      Debug. For developers only.
        -h      Help: You're looking at it.
        -V      Increase verbosity.


@@ 136,6 149,49 @@ sub calculate_mean {
    return $average;
}

sub find_mode {
    my $number_array_ref = shift;
    my (%numbers_count,$highest_number,$highest_count);
    foreach $number (@$number_array_ref) {
        $numbers_count{$number}++;
        if (!defined($highest_number)) {
            $highest_number = $number;
            $highest_count = 1;
        } elsif ($numbers_count{$number} > $highest_count) {
            $highest_number = $number;
            $highest_count++;
            print STDERR "Highest number is now: $highest_number with a count of $highest_count.\n" if ($verbose >= 2);
        }
    }
    return $highest_number;
}

# This routine will find the median of a set of numbers.
# If the set of numbers counts to be even, then it will take
# the higher indexed number in the middle of the set.
# If the -l option is set, it will take the lower index
# number in the middle.
sub find_median {
    my $number_array_ref = shift;
    my @number_array = sort {$a <=> $b} @$number_array_ref;
    my $array_count = scalar(@number_array);
    my $median_index;
    print "The array count is: $array_count\n" if ($verbose >= 2);
    if (($array_count % 2) == 0) {  # The index count is even.
        if ($opts{'l'}) {
            $median_index = int($array_count / 2);
        } else {
            $median_index = int(($array_count / 2) + 1);
        }
    } else {
        $median_index = int(($array_count / 2) + 0.5);
    }
    $median_index--; # Translate the number to 0 based array set.
    print "The median index is: " . int(($array_count / 2) - 0.5) . "\n" if ($verbose >= 2);
    my $median = $number_array[$median_index];
    return $median;
}

# Lay down some of that perl pod action.
=pod



@@ 145,22 201,28 @@ average - Find the average of a set of numbers.

=head1 SYNOPSIS

B<average> [-dhiIV] <FILE>
B<average> [-dhiIlmMV] <FILE>

| B<average> [-dhiIV]    (Input on STDIN from pipeline.)
| B<average> [-dhiIlmMV]    (Input on STDIN from pipeline.)

B<average> [-dhiIV]      (Input on STDIN.  Use Ctrl-D to stop.)
B<average> [-dhiIlmMV]      (Input on STDIN.  Use Ctrl-D to stop.)

=head1 DESCRIPTION

B<average> is a program that is part of the numeric utilities package.  B<average>
will determine the average from all numbers on input.
B<average> is a program that is part of the numeric utilities package.  By default
B<average> will determine the average from all numbers on input.  Other
options allow you to find the mode and median.

=head1 OPTIONS

    -i  Only return the integer portion of the final sum.
    -I  Only return the decimal portion of the final sum

    -m  Find the mode (most occuring) of the list of numbers.
    -M  Find the median (middle number) of the list of numbers.
    -l  When finding the median and the count of numbers in the set is even,
        use the lower middle number instead of the upper middle number.

    -h  Help: You're looking at it.
    -V  Increase verbosity.
    -d  Debug mode.  For developers


@@ 168,7 230,7 @@ will determine the average from all numbers on input.

=head1 SEE ALSO

bound(1), interval(1), numgrep(1), numprocess(1), numsum(1), random(1), range(1), round(1)
bound(1), interval(1), normalize(1), numgrep(1), numprocess(1), numsum(1), random(1), range(1), round(1)

=head1 COPYRIGHT


M bound => bound +1 -1
@@ 181,7 181,7 @@ all the numbers on the lines.

=head1 SEE ALSO

average(1), interval(1), numgrep(1), numprocess(1), numsum(1), random(1), range(1), round(1)
average(1), interval(1), normalize(1), numgrep(1), numprocess(1), numsum(1), random(1), range(1), round(1)

=head1 COPYRIGHT


M interval => interval +1 -1
@@ 155,7 155,7 @@ in box office ticket sales for movies on imdb.com. ;-)

=head1 SEE ALSO

average(1), bound(1), numgrep(1), numprocess(1), numsum(1), random(1), range(1), round(1)
average(1), bound(1), normalize(1), numgrep(1), numprocess(1), numsum(1), random(1), range(1), round(1)

=head1 COPYRIGHT


A normalize => normalize +197 -0
@@ 0,0 1,197 @@
#!/usr/bin/perl -w

# normalize:  Normalize a set of numbers. By default between 0 and 1.
#   
# Copyright (C) 2002-2003 Suso Banderas

# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
#
# You may contact the author at <suso@suso.org>.

#######################
# VARIABLES AND SETUP #
#######################

use Getopt::Std;
use strict;
use vars qw/ %opts $verbose /;

my ($file, @number_array, @output_array, $number);

getopts('hdR:V', \%opts);

if ($opts{'h'}) {
    &help();
    exit(0);
}

if ($opts{'d'}) {
    $verbose = 3;
    print STDERR "Debug mode\n";
} elsif ($opts{'V'}) {
    $verbose = 2;
    print STDERR "Verbose mode\n";
} elsif ($opts{'q'}) {
    $verbose = 0;  # Nothing except the final answer
} else {
    $verbose = 1;  # Normal warnings and such.
}


################
# MAIN PROGRAM #
################

# For file args
if (@ARGV) {
    foreach $file (@ARGV) {
        print STDERR "Reading from file $file.\n" if ($verbose >= 2);
        open (ARGFILE, "$file") &&
         process_filehandle(\*ARGFILE, \@number_array) ||
        $verbose && warn "Couldn't open file $file for reading: $!\n";
        close (ARGFILE);
    }
# For STDIN
} else {
    print STDERR "Reading from STDIN.\n" if ($verbose >= 2);
    process_filehandle(\*STDIN, \@number_array);
}

@output_array = normalize(\@number_array);

foreach $number (@output_array) {
    print "$number\n";
}

exit(0);

###############
# SUBROUTINES #
###############

sub help {
	print <<"EOF";
----------------------------------------------
normalize : Normalize a set of numbers.
----------------------------------------------
Usage:

    normalize [options] <file>
    | normalize [options]
    normalize [options]

Options:
        -d      Debug. For developers only.
        -h      Help: You're looking at it.
        -V      Increase verbosity.

        -R <range>   This allows you to specify a different
                   normalized range instead of from 0 to 1.
                   For example:  -R 0..5
EOF
}

sub process_filehandle {
    my $filehandle = shift;
    my $number_array_ref = shift;

    while (<$filehandle>) {
        if (m/^\s*(\-?[0-9]*\.?[0-9]+)/) {
            print STDERR "found number: $1\n" if ($verbose >= 3);
            push(@$number_array_ref, $1);
        }
    }
    return 1;
}

sub normalize {
    my $number_array_ref = shift;
    my ($sum, @normalized_array);
    my ($top_range, $bottom_range);
    if ($opts{'R'}) {
        ($bottom_range,$top_range) = split(/\.\./, $opts{'R'});
        print "R: $opts{'R'}\nbottom: $bottom_range\ntop: $top_range\n" if ($verbose >= 2);
    } else {
        $bottom_range = 0;
        $top_range = 1;
    }

    foreach $number (@$number_array_ref) {
        $sum += $number;
    }
    foreach $number (@$number_array_ref) {
        my $normalized_number = ($number / $sum) * ($top_range - $bottom_range) + $bottom_range;
        push(@normalized_array, $normalized_number);
    }
    return @normalized_array;
}

# Lay down some of that perl pod action.
=pod

=head1 NAME

normalize - Normalize a set of numbers. By default between 0 and 1.

=head1 SYNOPSIS

B<normalize> [-dhRV] <FILE>

| B<normalize> [-dhRV]    (Input on STDIN from pipeline.)

B<normalize> [-dhRV]      (Input on STDIN.  Use Ctrl-D to stop.)

=head1 DESCRIPTION

B<normalize> is a program that is part of the numeric utilities package.  B<normalize>
will take a set of numbers on input and return that set as a normalized set of numbers
between 0 and 1 by default.  Or you can use the -R option to specify a different normalized
range.

=head1 OPTIONS

    -h  Help: You're looking at it.
    -V  Increase verbosity.
    -d  Debug mode.  For developers

    -R <range>   This allows you to specify a different normalized range instead of from 0 to 1.
               For example -R 0..5


=head1 SEE ALSO

average(1), bound(1), interval(1), numgrep(1), numprocess(1), numsum(1), random(1), range(1), round(1)

=head1 COPYRIGHT

normalize is part of the num-utils package, which is copyrighted by
Suso Banderas and released under the GPL license.  Please read
the COPYING and LICENSE files that came with the num-utils package

  Developers can read the GOALS file and contact me about providing
submitions or help for the project.

=head1 MORE INFO

More info on normalize can be found at:

=over 1

=item B<http://suso.suso.org/programs/num-utils/>

=back

=cut


M num-utils.spec.in => num-utils.spec.in +7 -1
@@ 1,4 1,4 @@
# $Id: num-utils.spec.in,v 1.10 2003/06/09 14:53:38 suso Exp $
# $Id: num-utils.spec.in,v 1.11 2003/09/23 22:26:12 suso Exp $

Summary: num-utils are a set of programs for dealing with numbers.
Name: num-utils


@@ 31,6 31,7 @@ make ROOT="$RPM_BUILD_ROOT" rpminstall
%attr(0755 root root) /usr/bin/average
%attr(0755 root root) /usr/bin/bound
%attr(0755 root root) /usr/bin/interval
%attr(0755 root root) /usr/bin/normalize
%attr(0755 root root) /usr/bin/numgrep
%attr(0755 root root) /usr/bin/numprocess
%attr(0755 root root) /usr/bin/numsum


@@ 43,6 44,7 @@ make ROOT="$RPM_BUILD_ROOT" rpminstall
%{_mandir}/man1/average.1.gz
%{_mandir}/man1/bound.1.gz
%{_mandir}/man1/interval.1.gz
%{_mandir}/man1/normalize.1.gz
%{_mandir}/man1/numgrep.1.gz
%{_mandir}/man1/numprocess.1.gz
%{_mandir}/man1/numsum.1.gz


@@ 52,6 54,10 @@ make ROOT="$RPM_BUILD_ROOT" rpminstall


%changelog
* Tue Sep 23 2003 Suso Banderas <suso@suso.org>
- 0.4 release
- added file entries for normalize program.

* Mon Jun  9 2003 Suso Banderas <suso@suso.org>
- 0.3 beta release


M numgrep => numgrep +1 -1
@@ 359,7 359,7 @@ future.

=head1 SEE ALSO

average(1), bound(1), interval(1), numprocess(1), numsum(1), random(1), range(1), round(1)
average(1), bound(1), interval(1), normalize(1), numprocess(1), numsum(1), random(1), range(1), round(1)

=head1 COPYRIGHT


M numprocess => numprocess +1 -1
@@ 237,7 237,7 @@ of the original number.

=head1 SEE ALSO

average(1), bound(1), interval(1), numgrep(1), numsum(1), random(1), range(1), round(1)
average(1), bound(1), interval(1), normalize(1), numgrep(1), numsum(1), random(1), range(1), round(1)

=head1 BUGS


M numsum => numsum +1 -1
@@ 196,7 196,7 @@ Add up the total byte count in a http log file.

=head1 SEE ALSO

average(1), bound(1), interval(1), numgrep(1), numprocess(1), random(1), range(1), round(1)
average(1), bound(1), interval(1), normalize(1), numgrep(1), numprocess(1), random(1), range(1), round(1)

=head1 COPYRIGHT


M random => random +1 -1
@@ 221,7 221,7 @@ for large ranges.

=head1 SEE ALSO

average(1), bound(1), interval(1), numgrep(1), numprocess(1), numsum(1), range(1), round(1)
average(1), bound(1), interval(1), normalize(1), numgrep(1), numprocess(1), numsum(1), range(1), round(1)

=head1 COPYRIGHT


M range => range +1 -1
@@ 249,7 249,7 @@ downwards.

=head1 SEE ALSO

average(1), bound(1), interval(1), numgrep(1), numprocess(1), numsum(1), random(1), round(1)
seq(1), average(1), bound(1), interval(1), normalize(1), numgrep(1), numprocess(1), numsum(1), random(1), round(1)

=head1 COPYRIGHT


M round => round +1 -1
@@ 200,7 200,7 @@ some calculations to be in error, depending on how you are using the data.

=head1 SEE ALSO

average(1), bound(1), interval(1), numgrep(1), numprocess(1), numsum(1), random(1), range(1)
average(1), bound(1), interval(1), normalize(1), numgrep(1), numprocess(1), numsum(1), random(1), range(1)

=head1 COPYRIGHT


M template => template +1 -1
@@ 136,7 136,7 @@ will .......

=head1 SEE ALSO

average(1), bound(1), interval(1), numgrep(1), numprocess(1), numsum(1), random(1), range(1), round(1)
average(1), bound(1), interval(1), normalize(1), numgrep(1), numprocess(1), numsum(1), random(1), range(1), round(1)

=head1 COPYRIGHT