~mendelmaleh/pfin

97616a805462044e0ab5a3f7e0de3b76a05ee406 — Mendel E a month ago 6e14e79
Add citi parser

This parsers citi transaction html pages, because it exposes some info not included in the csv download.
M go.mod => go.mod +2 -0
@@ 3,9 3,11 @@ module git.sr.ht/~mendelmaleh/pfin
go 1.17

require (
	github.com/andybalholm/cascadia v1.3.1
	github.com/jszwec/csvutil v1.6.0
	github.com/pelletier/go-toml/v2 v2.0.0-beta.6
	github.com/stretchr/testify v1.7.1-0.20210427113832-6241f9ab9942
	golang.org/x/net v0.0.0-20220927171203-f486391704dc
)

require (

M go.sum => go.sum +14 -0
@@ 1,3 1,5 @@
github.com/andybalholm/cascadia v1.3.1 h1:nhxRkql1kdYCc8Snf7D5/D3spOX+dBgjA6u8x004T2c=
github.com/andybalholm/cascadia v1.3.1/go.mod h1:R4bJ1UQfqADjvDa4P6HZHLh/3OxWWEqc0Sk8XGwHqvA=
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=


@@ 10,6 12,18 @@ github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZN
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/testify v1.7.1-0.20210427113832-6241f9ab9942 h1:t0lM6y/M5IiUZyvbBTcngso8SZEZICH7is9B6g/obVU=
github.com/stretchr/testify v1.7.1-0.20210427113832-6241f9ab9942/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
golang.org/x/net v0.0.0-20210916014120-12bc252f5db8/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y=
golang.org/x/net v0.0.0-20220927171203-f486391704dc h1:FxpXZdoBqT8RjqTy6i1E8nXHhW21wK7ptQ/EPIGxzPQ=
golang.org/x/net v0.0.0-20220927171203-f486391704dc/go.mod h1:YDH+HFinaLZZlnHAfSS6ZXJJ9M9t4Dl22yv3iI2vPwk=
golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20220728004956-3c1f35247d10/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8=
golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ=
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c h1:dUUwHk2QECo/6vqA44rthZ8ie2QXMNeKRTHCNY2nXvo=

M parser/all/all.go => parser/all/all.go +1 -0
@@ 5,5 5,6 @@ import (
	_ "git.sr.ht/~mendelmaleh/pfin/parser/bofa"
	_ "git.sr.ht/~mendelmaleh/pfin/parser/capitalone"
	_ "git.sr.ht/~mendelmaleh/pfin/parser/chase"
	_ "git.sr.ht/~mendelmaleh/pfin/parser/citi"
	_ "git.sr.ht/~mendelmaleh/pfin/parser/personal"
)

A parser/citi/html.go => parser/citi/html.go +108 -0
@@ 0,0 1,108 @@
package citi

import (
	"errors"
	"fmt"
	"strconv"
	"strings"
	"time"

	"github.com/andybalholm/cascadia"
	"golang.org/x/net/html"
)

var (
	SelectorTxRow     = cascadia.MustCompile(".transaction-row.mobile")
	SelectorTxDesc    = cascadia.MustCompile("td > div > div.body > div.top > div.description")
	SelectorTxAmount  = cascadia.MustCompile("td > div > div.body > div.top > div.amount")
	SelectorTxDetails = cascadia.MustCompile("td.transaction-details-cell > div.transaction-details-wrapper > div.extended-descriptions >  div.extended-description-row")
)

func TxFromHTML(n *html.Node) (tx Transaction, err error) {
	if m := SelectorTxDesc.MatchFirst(n); m != nil {
		tx.Description = strings.TrimSpace(m.FirstChild.Data)
	} else {
		return tx, errors.New("no transaction description")
	}

	if m := SelectorTxAmount.MatchFirst(n); m != nil {
		s := strings.ReplaceAll(strings.TrimSpace(m.FirstChild.Data), ",", "")

		var negative bool
		if s[0] == '-' {
			s = s[2:]
			negative = true
		} else {
			s = s[1:]
		}

		f, err := strconv.ParseFloat(s, 64)
		if err != nil {
			return tx, fmt.Errorf("couldn't parse %q to float", s)
		}

		if negative {
			f *= -1
		}

		tx.Raw.Amount = f
	} else {
		return tx, errors.New("no transaction amount")
	}

	// additional details
	for _, m := range SelectorTxDetails.MatchAll(n.NextSibling) {
		name := m.FirstChild.FirstChild.Data
		value := m.FirstChild.NextSibling.FirstChild.Data

		if name == "Purchased On" || name == "Posted On" {
			var t time.Time
			if len(strings.Fields(value)) > 3 {
				// TODO: fix timezone parsing
				t, err = time.Parse("Jan 02, 2006 03:04 PM ET", value)
			} else {
				t, err = time.Parse("Jan 02, 2006", value)
			}

			if err != nil {
				return tx, err
			}

			switch name {
			case "Purchased On":
				tx.Purchased = t
			case "Posted On":
				tx.Posted = t
			}

			continue
		}

		switch name {
		case "Cardmember Name":
			tx.Cardmember = value
		case "Purchase Method":
			tx.Method = value
		case "Spend Category":
			tx.Raw.Category = value
		case "Rewards":
			if strings.Fields(value)[0] == "N/A" {
				continue
			}

			i, err := strconv.Atoi(strings.Fields(value)[0])
			if err != nil {
				return tx, err
			}

			tx.Rewards = i
		case "Type":
			tx.Type = value
		default:
			err = fmt.Errorf("unknown name %q (value %q)", name, value)
			return
		}
	}

	return
}

A parser/citi/interface.go => parser/citi/interface.go +44 -0
@@ 0,0 1,44 @@
package citi

import (
	"time"

	"git.sr.ht/~mendelmaleh/pfin/util"
)

func (tx Transaction) Date() time.Time {
	if tx.Purchased.IsZero() {
		return tx.Posted
	}

	return tx.Purchased
}

func (tx Transaction) Amount() float64 {
	return tx.Raw.Amount
}

func (tx Transaction) Name() string {
	return tx.Raw.Description
}

func (tx Transaction) Category() string {
	return tx.Raw.Category
}

func (tx Transaction) Card() string {
	return tx.Fields.Card
}

func (tx Transaction) User() string {
	return tx.Fields.User
}

func (tx Transaction) Account() string {
	return tx.Fields.Account
}

// should be util.FormatTx
func (tx Transaction) String() string {
	return util.FormatTx(tx, " ")
}

A parser/citi/parser.go => parser/citi/parser.go +54 -0
@@ 0,0 1,54 @@
package citi

import (
	"bytes"
	"regexp"

	"git.sr.ht/~mendelmaleh/pfin"
	"golang.org/x/net/html"
)

func init() {
	pfin.Register("citi", Parser{})
}

var virtualRegex = regexp.MustCompile(` - Virtual Account Number (\d{4})$`)

// implements pfin.Parser interface
type Parser struct{}

func (Parser) Filetype() string {
	return "html"
}

func (Parser) Parse(acc pfin.Account, filename string, data []byte) (txns []pfin.Transaction, err error) {
	// TODO: user io.Reader interface in parser.Parse
	doc, err := html.Parse(bytes.NewReader(data))
	if err != nil {
		// TODO: test err shadowing
		return
	}

	for _, v := range SelectorTxRow.MatchAll(doc) {
		tx, err := TxFromHTML(v)
		if err != nil {
			return txns, err
		}

		// virtual card number or cardmember
		matches := virtualRegex.FindStringSubmatchIndex(tx.Description)
		if len(matches) > 0 {
			tx.Fields.Card = tx.Description[matches[2]:]
			tx.Description = tx.Description[:matches[0]]
		} else {
			tx.Fields.Card = tx.Raw.Cardmember
		}

		tx.Fields.User = acc.Cards[tx.Fields.Card]
		tx.Fields.Account = acc.Name

		txns = append(txns, tx)
	}

	return
}

A parser/citi/types.go => parser/citi/types.go +31 -0
@@ 0,0 1,31 @@
package citi

import "time"

type Transaction struct {
	Fields // computed fields, namespaced so they don't conflict with the interface methods
	Raw    // raw fields from the csv
}

type Fields struct {
	Card    string
	User    string
	Account string
}

type Raw struct {
	Description string
	Amount      float64

	Purchased time.Time
	Posted    time.Time

	Cardmember string
	Method     string
	Category   string
	Rewards    int

	// Country    string

	Type string
}