~rgrjr/Spreadsheet-Table-Extract

46ed3ff68cc4cf7e5a67f500980989b769675f3f — Bob Rogers a year ago 6fbaa31
Make find_tables and extract return lists

* lib/Spreadsheet/Table/Extract.pm:
   + (find_tables):  Return a list rather than an arrayref.
   + (extract):  Likewise.
* t/01-basic.t:
   + Update the tests.
2 files changed, 53 insertions(+), 53 deletions(-)

M lib/Spreadsheet/Table/Extract.pm
M t/01-basic.t
M lib/Spreadsheet/Table/Extract.pm => lib/Spreadsheet/Table/Extract.pm +15 -15
@@ 52,8 52,7 @@ sub find_tables {
    my $class = $inits{class} || $invocant_class;

    my $maxrow = $sheet->{maxrow};
    my $tables = [ ];
    return $tables
    return
	unless $maxrow;

    # Find $min_slots.


@@ 76,6 75,7 @@ sub find_tables {
	@$min_slots <= grep { $column_from_slot->{$_}; } @$min_slots;
    };

    my @tables;
    my $make_table_descriptor = sub {
	# Make the table descriptor starting on the given row.
	my ($row) = @_;


@@ 94,9 94,9 @@ sub find_tables {
				first_col => $mincol, last_col => $maxcol,
				column_from_slot => $column_from_slot,
				@inits);
	push(@$tables, $table);
	push(@tables, $table);
	# Default the name if given a base_name.
	$table->name($inits{base_name} . @$tables)
	$table->name($inits{base_name} . scalar(@tables))
	    if ! $table->name && $inits{base_name};
	$column_from_slot = { };
    };


@@ 133,13 133,13 @@ sub find_tables {
	    if $have_enough_p->();
    }

    return $tables
    return @tables
}

sub extract {
    my ($self, $sheet, $class) = @_;

    my $result = [ ];
    my @result;
    my $column_from_slot = $self->column_from_slot;
    my $validator = $self->validate_object;
    for my $row ($self->first_row .. $self->last_row) {


@@ 158,17 158,17 @@ sub extract {
	    if (! $validator) {
		# The default validator is to insist on at least one non-empty
		# column.
		push(@$result, $class->new(@options))
		push(@result, $class->new(@options))
		    unless $n_null == (@options / 2);
	    }
	    else {
		my $object = $class->new(@options);
		push(@$result, $object)
		push(@result, $object)
		    if ! $validator || $validator->($object);
	    }
	};
    }
    return $result;
    return @result;
}

sub explain_overlap {


@@ 292,13 292,13 @@ Version 0.1

    my $book = ReadData('some-spreadsheet.xlsx');
    my $sheet1 = $book->[1];
    my $tables = Spreadsheet::Table::Extract->find_tables
    my @tables = Spreadsheet::Table::Extract->find_tables
        ($sheet, $slot_from_heading,
         validate_object => Checkbook::Entry->can('validate'));
    # assume we have only one table; if there are multiple heading
    # rows, there could be several.
    my $entries = $checkbook_table->extract($tables->[0]);
    say "have ", scalar($entries), " checkbook transactions.";
    my @entries = $checkbook_table->extract($tables[0]);
    say "have ", scalar(@entries), " checkbook transactions.";

=head1 DESCRIPTION



@@ 328,7 328,7 @@ potential overlap.

=item 3.

Call L</extract> on each table to produce an arrayref of objects of the
Call L</extract> on each table to produce a list of objects of the
desired application class.

=back


@@ 353,7 353,7 @@ the end user.
=head3 extract

Given a sheet and a class name, extract an object out of each row
described by the table we represent, and return an arrayref of all
described by the table we represent, and return a list of all
such objects.  To construct each object, our L</column_from_slot>
hashref is consulted, and for each slot we pluck the value out of the
corresponding row and column to construct a keyword/value list to pass


@@ 371,7 371,7 @@ the row is skippped (because we don't get an object back from C<new>).
Given a sheet, a hashref that describes how to identify a heading row,
plus additional optional keyword/value pairs which may be used
as extra object initializers, this class method searches for heading
rows in the sheet, returning an arrayref of instances, one for each
rows in the sheet, returning a list of instances, one for each
heading found.

The two positional arguments are as follows:

M t/01-basic.t => t/01-basic.t +38 -38
@@ 25,16 25,16 @@ my $file = 't/data/bank-statement.gnumeric';
my $book = Spreadsheet::Read->new($file);
ok($book, "have '$file' test content")
   or die "could not read '$file' as a spreadsheet";
my $tables = Spreadsheet::Table::Extract->find_tables
my @tables = Spreadsheet::Table::Extract->find_tables
    ($book->[1], $slot_from_name,
     minimum_slots => [ qw(description payment fee random) ]);
is_deeply($tables, [ ], "failing test matches");
$tables = Spreadsheet::Table::Extract->find_tables
ok(! @tables, "failing test returns nothing");
@tables = Spreadsheet::Table::Extract->find_tables
    ($book->[1], $slot_from_name,
     minimum_slots => [ qw(description payment fee deposit) ]);
use vars qw($test_1_full_expected);
do './t/data/basic-test-output.pm';
is_deeply($tables, $test_1_full_expected, "full table matches");
is_deeply(\@tables, $test_1_full_expected, "full table matches");

## Test object instantiation for the checkbook spreadsheet.
my $simple_slot_from_name = {


@@ 45,19 45,19 @@ my $simple_slot_from_name = {
	'deposit' => 'deposit'
};
my $simple_slots = [ values(%$simple_slot_from_name) ];
$tables = Spreadsheet::Table::Extract->find_tables
@tables = Spreadsheet::Table::Extract->find_tables
    ($book->[1], $simple_slot_from_name, minimum_slots => $simple_slots);
use vars qw($test_1_simple_expected $test_1_simple_entries);
is_deeply($tables, $test_1_simple_expected, "simple table matches");
my $checkbook_table = $tables->[0];
is_deeply(\@tables, $test_1_simple_expected, "simple table matches");
my $checkbook_table = $tables[0];
# Normally, this would be supplied as an initializer to the find_tables method,
# but that would confuse is_deeply.
$checkbook_table->validate_object
    (Spreadsheet::Table::Test::CheckbookEntry->can('validate'));
my $entries = $checkbook_table->extract
my @entries = $checkbook_table->extract
    ($book->[1], 'Spreadsheet::Table::Test::CheckbookEntry');
ok(19 == @$entries, "have 19 extracted entries");
is_deeply($entries, $test_1_simple_entries, "extracted entries match");
ok(19 == @entries, "have 19 extracted entries");
is_deeply(\@entries, $test_1_simple_entries, "extracted entries match");

## Test multiple tables on the same sheet.
my $multiple = 't/data/multiple.gnumeric';


@@ 66,48 66,48 @@ $book = Spreadsheet::Read->new($multiple);
ok($book, "have '$multiple' test content")
   or die "could not read '$multiple' as a spreadsheet";
# Look for the checkbook table.
$tables = Spreadsheet::Table::Extract->find_tables
@tables = Spreadsheet::Table::Extract->find_tables
    ($book->[1], $simple_slot_from_name, name => 'checkbook');
ok(@$tables == 1, "have one table");
my $check_table = $tables->[0];
ok(@tables == 1, "have one table");
my $check_table = $tables[0];
is_deeply($check_table, $test_1_multiple_expected->[0],
	  "first multiple table matches");
my $all_tables = $tables;
my $all_tables = [ @tables ];
# We are now extracting four more rows than we should.
$check_table->validate_object
    (Spreadsheet::Table::Test::CheckbookEntry->can('validate'));
$entries = $check_table->extract
@entries = $check_table->extract
    ($book->[1], 'Spreadsheet::Table::Test::CheckbookEntry');
ok(23 == @$entries, "have too many extracted entries");
ok(23 == @entries, "have too many extracted entries");
# Look for the category table.
my $category_slot_from_name
    = { 'short name' => 'tax_utils',
        'pretty name' => 'pretty_name',
	'tax?' => 'tax_p' };
$tables = Spreadsheet::Table::Extract->find_tables
@tables = Spreadsheet::Table::Extract->find_tables
    ($book->[1], $category_slot_from_name, name => 'category');
ok(@$tables == 1, "have one table");
my $cat_table = $tables->[0];
ok(@tables == 1, "have one table");
my $cat_table = $tables[0];
is_deeply($cat_table, $test_1_multiple_expected->[1],
	  "category (second multiple) table matches");
push(@$all_tables, @$tables);
push(@$all_tables, @tables);
# Look for the description table.
my $description_slot_from_name
    = { 'description' => 'description',
        'category' => 'category',
	'start date' => 'start_date',
	'end date' => 'end_date' };
$tables = Spreadsheet::Table::Extract->find_tables
@tables = Spreadsheet::Table::Extract->find_tables
    ($book->[1], $description_slot_from_name, name => 'description');
ok(@$tables == 1, "have one table");
push(@$all_tables, @$tables);
is_deeply($tables->[0], $test_1_multiple_expected->[2],
ok(@tables == 1, "have one table");
push(@$all_tables, @tables);
is_deeply($tables[0], $test_1_multiple_expected->[2],
	  "third multiple table matches");
# Check that the $min_slots default works.
$tables = Spreadsheet::Table::Extract->find_tables
@tables = Spreadsheet::Table::Extract->find_tables
    ($book->[1], $description_slot_from_name, name => 'description');
ok(@$tables == 1, "have one table");
is_deeply($tables->[0], $test_1_multiple_expected->[2],
ok(@tables == 1, "have one table");
is_deeply($tables[0], $test_1_multiple_expected->[2],
	  "third multiple table still matches");

# Test disjointness.


@@ 125,40 125,40 @@ $check_table->make_disjoint($all_tables);
is_deeply([ map { $_->last_row; } @$all_tables ], [ 29, 40, 40],
	  "disjoint tables have correct final rows");
# Once again, we get the right number of entries.
$entries = $all_tables->[0]->extract
@entries = $all_tables->[0]->extract
    ($book->[1], 'Spreadsheet::Table::Test::CheckbookEntry');
ok(19 == @$entries, "have 19 extracted entries again");
is_deeply($entries, $test_1_simple_entries, "extracted entries still match");
ok(19 == @entries, "have 19 extracted entries again");
is_deeply(\@entries, $test_1_simple_entries, "extracted entries still match");

## Test the validate_object default.
my $categories
my @categories
    = $cat_table->extract($book->[1], 'Spreadsheet::Table::Test::Category');
is(scalar(@$categories), 3, 'have three categories');
is(scalar(@categories), 3, 'have three categories');

## Test side-by-side table definitions.
my $side_by_side = 't/data/side-by-side.gnumeric';
my $book2 = Spreadsheet::Read->new($side_by_side);
ok($book2, "have '$side_by_side' test content")
   or die "could not read '$side_by_side' as a spreadsheet";
my $sbs_tables = Spreadsheet::Table::Extract->find_tables
my @sbs_tables = Spreadsheet::Table::Extract->find_tables
    ($book2->[1], { foo => 'foo', bar => 'bar', baz => 'baz' },
     base_name => 'foo');
my $columns = [ map { $_->column_from_slot; } @$sbs_tables ];
my $columns = [ map { $_->column_from_slot; } @sbs_tables ];
is_deeply($columns,
	  [ { 'bar' => 2, 'baz' => 3, 'foo' => 1 },
	    { 'bar' => 6, 'baz' => 5, 'foo' => 4 },
	    { 'bar' => 8, 'baz' => 7, 'foo' => 9 } ],
	  'three tables extracted with three columns each');
my $names = [ map { $_->name; } @$sbs_tables ];
my $names = [ map { $_->name; } @sbs_tables ];
is_deeply($names, [ qw(foo1 foo2 foo3) ],
	  'three tables extracted with generated names');

## Test changing the extracted table class.
$sbs_tables = Spreadsheet::Table::Extract->find_tables
@sbs_tables = Spreadsheet::Table::Extract->find_tables
    ($book2->[1], { foo => 'foo', bar => 'bar', baz => 'baz' },
     class => 'Spreadsheet::Table::Test::NewTable');
is(scalar(@$sbs_tables), 3, "still have three tables");
is(ref($sbs_tables->[0]), 'Spreadsheet::Table::Test::NewTable',
is(scalar(@sbs_tables), 3, "still have three tables");
is(ref($sbs_tables[0]), 'Spreadsheet::Table::Test::NewTable',
   'first new table isa Spreadsheet::Table::Test::NewTable');

### A test checkbook entry class to instantiate into.