support SGML OFX responses with no line breaks

Some financial institutions (*cough* Wells Fargo *cough*) export OFX files as a single line, which is technically valid according to the v1 spec. In order to parse them correctly, `readSGMLHeaders` now uses a regular expression that allows for all whitespace/line breaks to be optionally excluded.

A new sample response (wellsfargo.qfx) has been added to document this behaviour.
This commit is contained in:
Aaron Ross 2021-01-03 00:20:49 -08:00 committed by Aaron Lindsay
parent 56ca46714b
commit 0d93a42626
2 changed files with 46 additions and 47 deletions

View File

@ -7,6 +7,7 @@ import (
"fmt" "fmt"
"io" "io"
"reflect" "reflect"
"regexp"
"strings" "strings"
"github.com/aclindsa/xml" "github.com/aclindsa/xml"
@ -35,78 +36,75 @@ type Response struct {
} }
func (or *Response) readSGMLHeaders(r *bufio.Reader) error { func (or *Response) readSGMLHeaders(r *bufio.Reader) error {
var seenHeader, seenVersion bool = false, false b, err := r.ReadSlice('<')
for { if err != nil {
// Some financial institutions do not properly leave an empty line after the last header. return err
// Avoid attempting to read another header in that case. }
next, err := r.Peek(1)
if err != nil { s := string(b)
return err err = r.UnreadByte()
} if err != nil {
if next[0] == '<' { return err
break }
// According to the latest OFX SGML spec (1.6), headers should be CRLF-separated
// and written as KEY:VALUE. However, some banks include a whitespace after the
// colon (KEY: VALUE), while others include no line breaks at all. The spec doesn't
// require a line break after the OFX headers, but it is allowed, and will be
// optionally captured & discarded by the trailing `\s*`. Valid SGML headers must
// always be present in exactly this order, so a regular expression is acceptable.
headerExp := regexp.MustCompile(
`OFXHEADER:\s*(?P<OFXHEADER>\d+)\s*` +
`DATA:\s*(?P<DATA>[A-Z]+)\s*` +
`VERSION:\s*(?P<VERSION>\d+)\s*` +
`SECURITY:\s*(?P<SECURITY>[\w]+)\s*` +
`ENCODING:\s*(?P<ENCODING>[A-Z0-9-]+)\s*` +
`CHARSET:\s*(?P<CHARSET>[\w-]+)\s*` +
`COMPRESSION:\s*(?P<COMPRESSION>[A-Z]+)\s*` +
`OLDFILEUID:\s*(?P<OLDFILEUID>[\w-]+)\s*` +
`NEWFILEUID:\s*(?P<NEWFILEUID>[\w-]+)\s*`)
matches := headerExp.FindStringSubmatch(s)
if len(matches) == 0 {
return errors.New("OFX headers malformed")
}
for i, name := range headerExp.SubexpNames() {
if i == 0 {
continue
} }
line, err := r.ReadString('\n') headerValue := matches[i]
if err != nil { switch name {
return err
}
// r.ReadString leaves the '\n' on the end...
line = strings.TrimSpace(line)
if len(line) == 0 {
if seenHeader {
break
} else {
continue
}
}
header := strings.SplitN(line, ":", 2)
if header == nil || len(header) != 2 {
return errors.New("OFX headers malformed")
}
// Some OFX servers put a space after the colon
headervalue := strings.TrimSpace(header[1])
switch header[0] {
case "OFXHEADER": case "OFXHEADER":
if headervalue != "100" { if headerValue != "100" {
return errors.New("OFXHEADER is not 100") return errors.New("OFXHEADER is not 100")
} }
seenHeader = true
case "DATA": case "DATA":
if headervalue != "OFXSGML" { if headerValue != "OFXSGML" {
return errors.New("OFX DATA header does not contain OFXSGML") return errors.New("OFX DATA header does not contain OFXSGML")
} }
case "VERSION": case "VERSION":
err := or.Version.FromString(headervalue) err := or.Version.FromString(headerValue)
if err != nil { if err != nil {
return err return err
} }
seenVersion = true
if or.Version > OfxVersion160 { if or.Version > OfxVersion160 {
return errors.New("OFX VERSION > 160 in SGML header") return errors.New("OFX VERSION > 160 in SGML header")
} }
case "SECURITY": case "SECURITY":
if headervalue != "NONE" { if headerValue != "NONE" {
return errors.New("OFX SECURITY header not NONE") return errors.New("OFX SECURITY header not NONE")
} }
case "COMPRESSION": case "COMPRESSION":
if headervalue != "NONE" { if headerValue != "NONE" {
return errors.New("OFX COMPRESSION header not NONE") return errors.New("OFX COMPRESSION header not NONE")
} }
case "ENCODING", "CHARSET", "OLDFILEUID", "NEWFILEUID": case "ENCODING", "CHARSET", "OLDFILEUID", "NEWFILEUID":
// TODO check/handle these headers? // TODO: check/handle these headers?
default:
return errors.New("Invalid OFX header: " + header[0])
} }
} }
if !seenVersion {
return errors.New("OFX VERSION header missing")
}
return nil return nil
} }

View File

@ -0,0 +1 @@
OFXHEADER:100DATA:OFXSGMLVERSION:102SECURITY:NONEENCODING:USASCIICHARSET:1252COMPRESSION:NONEOLDFILEUID:NONENEWFILEUID:NONE<OFX><SIGNONMSGSRSV1><SONRS><STATUS><CODE>0<SEVERITY>INFO<MESSAGE>SUCCESS</STATUS><DTSERVER>20210102211014.201[-8:PST]<LANGUAGE>ENG<FI><ORG>WF<FID>1000</FI><SESSCOOKIE>abc-123<INTU.BID>1000<INTU.USERID>jane_doe</SONRS></SIGNONMSGSRSV1><BANKMSGSRSV1><STMTTRNRS><TRNUID>0<STATUS><CODE>0<SEVERITY>INFO<MESSAGE>SUCCESS</STATUS><STMTRS><CURDEF>USD<BANKACCTFROM><BANKID>123456789<ACCTID>9876543210<ACCTTYPE>CHECKING</BANKACCTFROM><BANKTRANLIST><DTSTART>20201201120000.000[-8:PST]<DTEND>20201231120000.000[-8:PST]<STMTTRN><TRNTYPE>DIRECTDEBIT<DTPOSTED>20201201120000.000[-8:PST]<TRNAMT>-12.34<FITID>202012011<NAME>AE Visa Card AE EPAY<MEMO> XXXXX1234</STMTTRN></BANKTRANLIST><LEDGERBAL><BALAMT>123.45<DTASOF>20201231120000.000[-8:PST]</LEDGERBAL><AVAILBAL><BALAMT>123.45<DTASOF>20201231120000.000[-8:PST]</AVAILBAL></STMTRS></STMTTRNRS></BANKMSGSRSV1></OFX>