support SGML OFX responses with no line breaks

Some financial institutions (*cough* Wells Fargo *cough*) export OFX files as a single line, which is technically valid according to the v1 spec. In order to parse them correctly, `readSGMLHeaders` now uses a regular expression that allows for all whitespace/line breaks to be optionally excluded.

A new sample response (wellsfargo.qfx) has been added to document this behaviour.
This commit is contained in:
Aaron Ross 2021-01-03 00:20:49 -08:00 committed by Aaron Lindsay
parent 56ca46714b
commit 0d93a42626
2 changed files with 46 additions and 47 deletions

View File

@ -7,6 +7,7 @@ import (
"fmt"
"io"
"reflect"
"regexp"
"strings"
"github.com/aclindsa/xml"
@ -35,78 +36,75 @@ type Response struct {
}
func (or *Response) readSGMLHeaders(r *bufio.Reader) error {
var seenHeader, seenVersion bool = false, false
for {
// Some financial institutions do not properly leave an empty line after the last header.
// Avoid attempting to read another header in that case.
next, err := r.Peek(1)
if err != nil {
return err
}
if next[0] == '<' {
break
b, err := r.ReadSlice('<')
if err != nil {
return err
}
s := string(b)
err = r.UnreadByte()
if err != nil {
return err
}
// According to the latest OFX SGML spec (1.6), headers should be CRLF-separated
// and written as KEY:VALUE. However, some banks include a whitespace after the
// colon (KEY: VALUE), while others include no line breaks at all. The spec doesn't
// require a line break after the OFX headers, but it is allowed, and will be
// optionally captured & discarded by the trailing `\s*`. Valid SGML headers must
// always be present in exactly this order, so a regular expression is acceptable.
headerExp := regexp.MustCompile(
`OFXHEADER:\s*(?P<OFXHEADER>\d+)\s*` +
`DATA:\s*(?P<DATA>[A-Z]+)\s*` +
`VERSION:\s*(?P<VERSION>\d+)\s*` +
`SECURITY:\s*(?P<SECURITY>[\w]+)\s*` +
`ENCODING:\s*(?P<ENCODING>[A-Z0-9-]+)\s*` +
`CHARSET:\s*(?P<CHARSET>[\w-]+)\s*` +
`COMPRESSION:\s*(?P<COMPRESSION>[A-Z]+)\s*` +
`OLDFILEUID:\s*(?P<OLDFILEUID>[\w-]+)\s*` +
`NEWFILEUID:\s*(?P<NEWFILEUID>[\w-]+)\s*`)
matches := headerExp.FindStringSubmatch(s)
if len(matches) == 0 {
return errors.New("OFX headers malformed")
}
for i, name := range headerExp.SubexpNames() {
if i == 0 {
continue
}
line, err := r.ReadString('\n')
if err != nil {
return err
}
// r.ReadString leaves the '\n' on the end...
line = strings.TrimSpace(line)
if len(line) == 0 {
if seenHeader {
break
} else {
continue
}
}
header := strings.SplitN(line, ":", 2)
if header == nil || len(header) != 2 {
return errors.New("OFX headers malformed")
}
// Some OFX servers put a space after the colon
headervalue := strings.TrimSpace(header[1])
switch header[0] {
headerValue := matches[i]
switch name {
case "OFXHEADER":
if headervalue != "100" {
if headerValue != "100" {
return errors.New("OFXHEADER is not 100")
}
seenHeader = true
case "DATA":
if headervalue != "OFXSGML" {
if headerValue != "OFXSGML" {
return errors.New("OFX DATA header does not contain OFXSGML")
}
case "VERSION":
err := or.Version.FromString(headervalue)
err := or.Version.FromString(headerValue)
if err != nil {
return err
}
seenVersion = true
if or.Version > OfxVersion160 {
return errors.New("OFX VERSION > 160 in SGML header")
}
case "SECURITY":
if headervalue != "NONE" {
if headerValue != "NONE" {
return errors.New("OFX SECURITY header not NONE")
}
case "COMPRESSION":
if headervalue != "NONE" {
if headerValue != "NONE" {
return errors.New("OFX COMPRESSION header not NONE")
}
case "ENCODING", "CHARSET", "OLDFILEUID", "NEWFILEUID":
// TODO check/handle these headers?
default:
return errors.New("Invalid OFX header: " + header[0])
// TODO: check/handle these headers?
}
}
if !seenVersion {
return errors.New("OFX VERSION header missing")
}
return nil
}

View File

@ -0,0 +1 @@
OFXHEADER:100DATA:OFXSGMLVERSION:102SECURITY:NONEENCODING:USASCIICHARSET:1252COMPRESSION:NONEOLDFILEUID:NONENEWFILEUID:NONE<OFX><SIGNONMSGSRSV1><SONRS><STATUS><CODE>0<SEVERITY>INFO<MESSAGE>SUCCESS</STATUS><DTSERVER>20210102211014.201[-8:PST]<LANGUAGE>ENG<FI><ORG>WF<FID>1000</FI><SESSCOOKIE>abc-123<INTU.BID>1000<INTU.USERID>jane_doe</SONRS></SIGNONMSGSRSV1><BANKMSGSRSV1><STMTTRNRS><TRNUID>0<STATUS><CODE>0<SEVERITY>INFO<MESSAGE>SUCCESS</STATUS><STMTRS><CURDEF>USD<BANKACCTFROM><BANKID>123456789<ACCTID>9876543210<ACCTTYPE>CHECKING</BANKACCTFROM><BANKTRANLIST><DTSTART>20201201120000.000[-8:PST]<DTEND>20201231120000.000[-8:PST]<STMTTRN><TRNTYPE>DIRECTDEBIT<DTPOSTED>20201201120000.000[-8:PST]<TRNAMT>-12.34<FITID>202012011<NAME>AE Visa Card AE EPAY<MEMO> XXXXX1234</STMTTRN></BANKTRANLIST><LEDGERBAL><BALAMT>123.45<DTASOF>20201231120000.000[-8:PST]</LEDGERBAL><AVAILBAL><BALAMT>123.45<DTASOF>20201231120000.000[-8:PST]</AVAILBAL></STMTRS></STMTTRNRS></BANKMSGSRSV1></OFX>