From 64e49ca573272abcb729e09a8160119bf80735fd Mon Sep 17 00:00:00 2001 From: JF Technology <57098161+jf-tech@users.noreply.github.com> Date: Fri, 21 Feb 2025 20:57:36 +1300 Subject: [PATCH] ISSUE 227: add `parser_settings.debug` flag and enable `csv2` reader to optionally inject debug (line) info into record IDR. Decided to make the debug flag a global setting in `parser_settings` and leave its type to be an int (>= 0) for future flexibility. Could've done with a string/enum to make it more defined and strict, but given it is an adv setting and current usage is so scarce thus leaving it flexible until further requirements arise. For `csv2` reader, if `parser_settings.debug` is 0 or omitted, which is the vast vast majority of existing and future `csv2` schemas, no behavior changes; if `parser_settings.debug` isn't 0, then a `__debug` node will be added to the record IDR structure, underneath which, currently only `line_num` debug info will be added. This design is flexible for all future adoptions in all other file format readers, yet has zero impact on any existing schemas. --- doc/csv2_in_depth.md | 21 +++++ extensions/omniv21/fileformat/csv/format.go | 6 +- .../omniv21/fileformat/csv/format_test.go | 2 + extensions/omniv21/fileformat/edi/format.go | 6 +- .../omniv21/fileformat/edi/format_test.go | 4 +- extensions/omniv21/fileformat/fileformat.go | 6 +- .../omniv21/fileformat/fixedlength/format.go | 6 +- .../fileformat/fixedlength/format_test.go | 2 + .../csv/.snapshots/TestRead-multiple_records | 78 ++++++++++++++++++- .../omniv21/fileformat/flatfile/csv/format.go | 8 +- .../fileformat/flatfile/csv/format_test.go | 3 + .../omniv21/fileformat/flatfile/csv/reader.go | 46 ++++++++--- .../fileformat/flatfile/csv/reader_test.go | 9 ++- .../fileformat/flatfile/fixedlength/format.go | 6 +- .../flatfile/fixedlength/format_test.go | 3 + .../flatfile/fixedlength/reader_test.go | 3 +- extensions/omniv21/fileformat/json/format.go | 6 +- .../omniv21/fileformat/json/format_test.go | 5 +- extensions/omniv21/fileformat/xml/format.go | 6 +- .../omniv21/fileformat/xml/format_test.go | 5 +- .../samples/csv2/.snapshots/Test1_Single_Row | 15 ++-- .../samples/csv2/1_single_row.schema.json | 4 +- .../jsonlog/jsonlogformat/jsonlogformat.go | 6 +- extensions/omniv21/schemahandler.go | 2 +- extensions/omniv21/schemahandler_test.go | 20 +++-- header/header.go | 4 +- validation/parserSettings.go | 3 +- validation/parserSettings.json | 3 +- 28 files changed, 240 insertions(+), 48 deletions(-) diff --git a/doc/csv2_in_depth.md b/doc/csv2_in_depth.md index 8f93940..1ce1679 100644 --- a/doc/csv2_in_depth.md +++ b/doc/csv2_in_depth.md @@ -486,6 +486,27 @@ Then the output for the above sample input will be: ] ``` +## Debug Info +If debug information is desired to be added into each record's IDR therefore becoming available for +`transform_declarations` use, one can specify `"debug": 1` in `parser_settings` section of the schema: +``` +{ + "parser_settings": { + "version": "omni.2.1", + "file_format_type": "csv2", + "debug": 1 + }, + ... +} +``` +If the `csv2` parser detects `"debug": 1` setting, it will add the following debug info into the +current record's IDR structure: + +- xpath: `__debug/line_num`: contains the starting line number in the CSV file of the current +record. + +Check this [sample](../extensions/omniv21/samples/csv2/1_single_row.schema.json) for usage pattern. + ## Migration from `'csv'` Schemas If one looks at the documentation for the old `csv` schema [here](./csv_in_depth.md), you notice diff --git a/extensions/omniv21/fileformat/csv/format.go b/extensions/omniv21/fileformat/csv/format.go index 778148d..c94da4f 100644 --- a/extensions/omniv21/fileformat/csv/format.go +++ b/extensions/omniv21/fileformat/csv/format.go @@ -13,6 +13,7 @@ import ( "github.com/jf-tech/omniparser/extensions/omniv21/fileformat" "github.com/jf-tech/omniparser/extensions/omniv21/transform" v21validation "github.com/jf-tech/omniparser/extensions/omniv21/validation" + "github.com/jf-tech/omniparser/header" "github.com/jf-tech/omniparser/validation" ) @@ -97,7 +98,10 @@ func (f *csvFileFormat) validateColumns(columns []Column) error { } func (f *csvFileFormat) CreateFormatReader( - name string, r io.Reader, runtime interface{}) (fileformat.FormatReader, error) { + _ header.Header, + name string, + r io.Reader, + runtime interface{}) (fileformat.FormatReader, error) { csv := runtime.(*csvFormatRuntime) return NewReader(name, r, csv.Decl, csv.XPath) } diff --git a/extensions/omniv21/fileformat/csv/format_test.go b/extensions/omniv21/fileformat/csv/format_test.go index ca9ab6a..8a86fd4 100644 --- a/extensions/omniv21/fileformat/csv/format_test.go +++ b/extensions/omniv21/fileformat/csv/format_test.go @@ -13,6 +13,7 @@ import ( "github.com/jf-tech/omniparser/errs" "github.com/jf-tech/omniparser/extensions/omniv21/transform" + "github.com/jf-tech/omniparser/header" "github.com/jf-tech/omniparser/idr" ) @@ -157,6 +158,7 @@ func TestValidateSchema(t *testing.T) { func TestCreateFormatReader(t *testing.T) { r, err := NewCSVFileFormat("test").CreateFormatReader( + header.Header{}, "test-input", strings.NewReader( lf("A|B|C")+ diff --git a/extensions/omniv21/fileformat/edi/format.go b/extensions/omniv21/fileformat/edi/format.go index 0b55a26..92288cd 100644 --- a/extensions/omniv21/fileformat/edi/format.go +++ b/extensions/omniv21/fileformat/edi/format.go @@ -13,6 +13,7 @@ import ( "github.com/jf-tech/omniparser/extensions/omniv21/fileformat" "github.com/jf-tech/omniparser/extensions/omniv21/transform" v21validation "github.com/jf-tech/omniparser/extensions/omniv21/validation" + "github.com/jf-tech/omniparser/header" "github.com/jf-tech/omniparser/validation" ) @@ -74,7 +75,10 @@ func (f *ediFileFormat) validateFileDecl(decl *FileDecl) error { } func (f *ediFileFormat) CreateFormatReader( - name string, r io.Reader, runtime interface{}) (fileformat.FormatReader, error) { + _ header.Header, + name string, + r io.Reader, + runtime interface{}) (fileformat.FormatReader, error) { edi := runtime.(*ediFormatRuntime) return NewReader(name, r, edi.Decl, edi.XPath) } diff --git a/extensions/omniv21/fileformat/edi/format_test.go b/extensions/omniv21/fileformat/edi/format_test.go index b69ffde..3cabc20 100644 --- a/extensions/omniv21/fileformat/edi/format_test.go +++ b/extensions/omniv21/fileformat/edi/format_test.go @@ -11,6 +11,7 @@ import ( "github.com/stretchr/testify/assert" "github.com/jf-tech/omniparser/extensions/omniv21/transform" + "github.com/jf-tech/omniparser/header" "github.com/jf-tech/omniparser/idr" ) @@ -172,7 +173,8 @@ func TestCreateFormatReader(t *testing.T) { }` rt, err := format.ValidateSchema(fileFormatEDI, []byte(fileDecl), &transform.Decl{XPath: strs.StrPtr(".")}) assert.NoError(t, err) - reader, err := format.CreateFormatReader("test", strings.NewReader("ISA*e1*e2*e3\nISA*e4*e5*e6\n"), rt) + reader, err := format.CreateFormatReader( + header.Header{}, "test", strings.NewReader("ISA*e1*e2*e3\nISA*e4*e5*e6\n"), rt) assert.NoError(t, err) n, err := reader.Read() assert.NoError(t, err) diff --git a/extensions/omniv21/fileformat/fileformat.go b/extensions/omniv21/fileformat/fileformat.go index bdcac85..f1a555c 100644 --- a/extensions/omniv21/fileformat/fileformat.go +++ b/extensions/omniv21/fileformat/fileformat.go @@ -5,6 +5,7 @@ import ( "github.com/jf-tech/omniparser/errs" "github.com/jf-tech/omniparser/extensions/omniv21/transform" + "github.com/jf-tech/omniparser/header" "github.com/jf-tech/omniparser/idr" ) @@ -18,7 +19,10 @@ type FileFormat interface { // CreateFormatReader creates an FormatReader which reads records of input data for this file format. CreateFormatReader( - inputName string, input io.Reader, formatRuntime interface{}) (FormatReader, error) + schemaHeader header.Header, + inputName string, + input io.Reader, + formatRuntime interface{}) (FormatReader, error) } // FormatReader is an interface for reading a specific input format in omni schema handler. We'll have diff --git a/extensions/omniv21/fileformat/fixedlength/format.go b/extensions/omniv21/fileformat/fixedlength/format.go index 21521e4..0c4de1d 100644 --- a/extensions/omniv21/fileformat/fixedlength/format.go +++ b/extensions/omniv21/fileformat/fixedlength/format.go @@ -14,6 +14,7 @@ import ( "github.com/jf-tech/omniparser/extensions/omniv21/fileformat" "github.com/jf-tech/omniparser/extensions/omniv21/transform" v21validation "github.com/jf-tech/omniparser/extensions/omniv21/validation" + "github.com/jf-tech/omniparser/header" "github.com/jf-tech/omniparser/validation" ) @@ -128,7 +129,10 @@ func (f *fixedLengthFileFormat) validateColumns(cols []*ColumnDecl) error { } func (f *fixedLengthFileFormat) CreateFormatReader( - name string, r io.Reader, runtime interface{}) (fileformat.FormatReader, error) { + _ header.Header, + name string, + r io.Reader, + runtime interface{}) (fileformat.FormatReader, error) { rt := runtime.(*fixedLengthFormatRuntime) return NewReader(name, r, rt.Decl, rt.XPath) } diff --git a/extensions/omniv21/fileformat/fixedlength/format_test.go b/extensions/omniv21/fileformat/fixedlength/format_test.go index e94f480..35ae9f0 100644 --- a/extensions/omniv21/fileformat/fixedlength/format_test.go +++ b/extensions/omniv21/fileformat/fixedlength/format_test.go @@ -13,6 +13,7 @@ import ( "github.com/jf-tech/omniparser/errs" "github.com/jf-tech/omniparser/extensions/omniv21/transform" + "github.com/jf-tech/omniparser/header" "github.com/jf-tech/omniparser/idr" ) @@ -308,6 +309,7 @@ func TestValidateSchema(t *testing.T) { func TestCreateFormatReader(t *testing.T) { r, err := NewFixedLengthFileFormat("test").CreateFormatReader( + header.Header{}, "test", strings.NewReader("abcd\n1234\n"), &fixedLengthFormatRuntime{ diff --git a/extensions/omniv21/fileformat/flatfile/csv/.snapshots/TestRead-multiple_records b/extensions/omniv21/fileformat/flatfile/csv/.snapshots/TestRead-multiple_records index 515f8af..70cbf61 100644 --- a/extensions/omniv21/fileformat/flatfile/csv/.snapshots/TestRead-multiple_records +++ b/extensions/omniv21/fileformat/flatfile/csv/.snapshots/TestRead-multiple_records @@ -1,5 +1,40 @@ { "Children": [ + { + "Children": [ + { + "Children": [ + { + "Children": null, + "Data": "1", + "FirstChild": null, + "FormatSpecific": null, + "LastChild": null, + "NextSibling": null, + "Parent": "(ElementNode line_num)", + "PrevSibling": null, + "Type": "TextNode" + } + ], + "Data": "line_num", + "FirstChild": "(TextNode '1')", + "FormatSpecific": null, + "LastChild": "(TextNode '1')", + "NextSibling": null, + "Parent": "(ElementNode __debug)", + "PrevSibling": null, + "Type": "ElementNode" + } + ], + "Data": "__debug", + "FirstChild": "(ElementNode line_num)", + "FormatSpecific": null, + "LastChild": "(ElementNode line_num)", + "NextSibling": "(ElementNode r1c1)", + "Parent": "(ElementNode r1)", + "PrevSibling": null, + "Type": "ElementNode" + }, { "Children": [ { @@ -20,7 +55,7 @@ "LastChild": "(TextNode 'v1')", "NextSibling": "(ElementNode r1c2)", "Parent": "(ElementNode r1)", - "PrevSibling": null, + "PrevSibling": "(ElementNode __debug)", "Type": "ElementNode" }, { @@ -48,7 +83,7 @@ } ], "Data": "r1", - "FirstChild": "(ElementNode r1c1)", + "FirstChild": "(ElementNode __debug)", "FormatSpecific": null, "LastChild": "(ElementNode r1c2)", "NextSibling": null, @@ -58,6 +93,41 @@ }, { "Children": [ + { + "Children": [ + { + "Children": [ + { + "Children": null, + "Data": "3", + "FirstChild": null, + "FormatSpecific": null, + "LastChild": null, + "NextSibling": null, + "Parent": "(ElementNode line_num)", + "PrevSibling": null, + "Type": "TextNode" + } + ], + "Data": "line_num", + "FirstChild": "(TextNode '3')", + "FormatSpecific": null, + "LastChild": "(TextNode '3')", + "NextSibling": null, + "Parent": "(ElementNode __debug)", + "PrevSibling": null, + "Type": "ElementNode" + } + ], + "Data": "__debug", + "FirstChild": "(ElementNode line_num)", + "FormatSpecific": null, + "LastChild": "(ElementNode line_num)", + "NextSibling": "(ElementNode r1c1)", + "Parent": "(ElementNode r1)", + "PrevSibling": null, + "Type": "ElementNode" + }, { "Children": [ { @@ -78,12 +148,12 @@ "LastChild": "(TextNode '')", "NextSibling": null, "Parent": "(ElementNode r1)", - "PrevSibling": null, + "PrevSibling": "(ElementNode __debug)", "Type": "ElementNode" } ], "Data": "r1", - "FirstChild": "(ElementNode r1c1)", + "FirstChild": "(ElementNode __debug)", "FormatSpecific": null, "LastChild": "(ElementNode r1c1)", "NextSibling": null, diff --git a/extensions/omniv21/fileformat/flatfile/csv/format.go b/extensions/omniv21/fileformat/flatfile/csv/format.go index c876093..6455089 100644 --- a/extensions/omniv21/fileformat/flatfile/csv/format.go +++ b/extensions/omniv21/fileformat/flatfile/csv/format.go @@ -14,6 +14,7 @@ import ( "github.com/jf-tech/omniparser/extensions/omniv21/fileformat" "github.com/jf-tech/omniparser/extensions/omniv21/transform" v21validation "github.com/jf-tech/omniparser/extensions/omniv21/validation" + "github.com/jf-tech/omniparser/header" "github.com/jf-tech/omniparser/validation" ) @@ -76,7 +77,10 @@ func (f *csvFormat) validateFileDecl(decl *FileDecl) error { } func (f *csvFormat) CreateFormatReader( - name string, r io.Reader, runtime interface{}) (fileformat.FormatReader, error) { + schemaHeader header.Header, + name string, + r io.Reader, + runtime interface{}) (fileformat.FormatReader, error) { rt := runtime.(*csvFormatRuntime) targetXPathExpr, err := func() (*xpath.Expr, error) { if rt.XPath == "" || rt.XPath == "." { @@ -87,7 +91,7 @@ func (f *csvFormat) CreateFormatReader( if err != nil { return nil, f.FmtErr("xpath '%s' on 'FINAL_OUTPUT' is invalid: %s", rt.XPath, err.Error()) } - return NewReader(name, r, rt.Decl, targetXPathExpr), nil + return NewReader(schemaHeader, name, r, rt.Decl, targetXPathExpr), nil } func (f *csvFormat) FmtErr(format string, args ...interface{}) error { diff --git a/extensions/omniv21/fileformat/flatfile/csv/format_test.go b/extensions/omniv21/fileformat/flatfile/csv/format_test.go index 201fe3c..4618251 100644 --- a/extensions/omniv21/fileformat/flatfile/csv/format_test.go +++ b/extensions/omniv21/fileformat/flatfile/csv/format_test.go @@ -12,6 +12,7 @@ import ( "github.com/jf-tech/omniparser/errs" "github.com/jf-tech/omniparser/extensions/omniv21/transform" + "github.com/jf-tech/omniparser/header" "github.com/jf-tech/omniparser/idr" ) @@ -284,6 +285,7 @@ func TestCreateFormatReader(t *testing.T) { &transform.Decl{XPath: finalOutputXPath}) assert.NoError(t, err) reader, err := format.CreateFormatReader( + header.Header{}, "test-input", strings.NewReader("abcd|efgh|jklm\n123|456|789\n"), runtime) @@ -301,6 +303,7 @@ func TestCreateFormatReader(t *testing.T) { // test CreateFormatReader called with invalid target xpath. reader, err := NewCSVFileFormat("test-schema").CreateFormatReader( + header.Header{}, "test-input", strings.NewReader("abcd\n1234\n"), &csvFormatRuntime{XPath: "["}) diff --git a/extensions/omniv21/fileformat/flatfile/csv/reader.go b/extensions/omniv21/fileformat/flatfile/csv/reader.go index 9bc0ff8..54b0c04 100644 --- a/extensions/omniv21/fileformat/flatfile/csv/reader.go +++ b/extensions/omniv21/fileformat/flatfile/csv/reader.go @@ -4,11 +4,13 @@ import ( "errors" "fmt" "io" + "strconv" "github.com/antchfx/xpath" "github.com/jf-tech/go-corelib/ios" "github.com/jf-tech/omniparser/extensions/omniv21/fileformat/flatfile" + "github.com/jf-tech/omniparser/header" "github.com/jf-tech/omniparser/idr" ) @@ -18,18 +20,28 @@ type line struct { raw string } +const ( + debug = "__debug" + debug_line_num = "line_num" +) + type reader struct { - inputName string - fileDecl *FileDecl - r *ios.LineNumReportingCsvReader - hr *flatfile.HierarchyReader - linesBuf []line // linesBuf contains all the unprocessed lines - records []string + schemaHeader header.Header + inputName string + fileDecl *FileDecl + r *ios.LineNumReportingCsvReader + hr *flatfile.HierarchyReader + linesBuf []line // linesBuf contains all the unprocessed lines + records []string } // NewReader creates an FormatReader for csv file format. func NewReader( - inputName string, r io.Reader, decl *FileDecl, targetXPathExpr *xpath.Expr) *reader { + schemaHeader header.Header, + inputName string, + r io.Reader, + decl *FileDecl, + targetXPathExpr *xpath.Expr) *reader { if decl.ReplaceDoubleQuotes { r = ios.NewBytesReplacingReader(r, []byte(`"`), []byte(`'`)) } @@ -43,9 +55,10 @@ func NewReader( // those record string references down: reader.records[]. csv.ReuseRecord = true reader := &reader{ - inputName: inputName, - fileDecl: decl, - r: csv, + schemaHeader: schemaHeader, + inputName: inputName, + fileDecl: decl, + r: csv, } reader.hr = flatfile.NewHierarchyReader( toFlatFileRecDecls(decl.Records), reader, targetXPathExpr) @@ -186,6 +199,7 @@ func (r *reader) linesToNode(decl *RecordDecl, n int) *idr.Node { "linesBuf has %d lines but requested %d lines to convert", len(r.linesBuf), n)) } node := idr.CreateNode(idr.ElementNode, decl.Name) + r.insertDebugInfo(node) for col := range decl.Columns { colDecl := decl.Columns[col] for i := 0; i < n; i++ { @@ -203,6 +217,18 @@ func (r *reader) linesToNode(decl *RecordDecl, n int) *idr.Node { return node } +func (r *reader) insertDebugInfo(node *idr.Node) { + if r.schemaHeader.ParserSettings.Debug == 0 { + return + } + debugNode := idr.CreateNode(idr.ElementNode, debug) + idr.AddChild(node, debugNode) + lineNumNode := idr.CreateNode(idr.ElementNode, debug_line_num) + idr.AddChild(debugNode, lineNumNode) + lineNumValNode := idr.CreateNode(idr.TextNode, strconv.Itoa(r.linesBuf[0].lineNum)) + idr.AddChild(lineNumNode, lineNumValNode) +} + func (r *reader) popFrontLinesBuf(n int) { if n > len(r.linesBuf) { panic(fmt.Sprintf( diff --git a/extensions/omniv21/fileformat/flatfile/csv/reader_test.go b/extensions/omniv21/fileformat/flatfile/csv/reader_test.go index aa35660..2c25c05 100644 --- a/extensions/omniv21/fileformat/flatfile/csv/reader_test.go +++ b/extensions/omniv21/fileformat/flatfile/csv/reader_test.go @@ -14,6 +14,7 @@ import ( "github.com/jf-tech/go-corelib/ios" "github.com/jf-tech/go-corelib/strs" "github.com/jf-tech/go-corelib/testlib" + "github.com/jf-tech/omniparser/header" "github.com/jf-tech/omniparser/idr" "github.com/stretchr/testify/assert" ) @@ -25,6 +26,7 @@ func lf(s string) string { func TestRead(t *testing.T) { for _, test := range []struct { name string + debug int fileDecl string targetXPath string input io.Reader @@ -72,7 +74,8 @@ func TestRead(t *testing.T) { }, }, { - name: "multiple records", + name: "multiple records", + debug: 1, fileDecl: `{ "delimiter": ",", "records": [ @@ -125,7 +128,9 @@ func TestRead(t *testing.T) { targetXPathExpr, err = caches.GetXPathExpr(test.targetXPath) assert.NoError(t, err) } - r := NewReader("test-input", test.input, &fd, targetXPathExpr) + r := NewReader( + header.Header{ParserSettings: header.ParserSettings{Debug: test.debug}}, + "test-input", test.input, &fd, targetXPathExpr) var nodes []string for _, expErr := range test.expErrs { node, err := r.Read() diff --git a/extensions/omniv21/fileformat/flatfile/fixedlength/format.go b/extensions/omniv21/fileformat/flatfile/fixedlength/format.go index 7893be4..cd9a1d9 100644 --- a/extensions/omniv21/fileformat/flatfile/fixedlength/format.go +++ b/extensions/omniv21/fileformat/flatfile/fixedlength/format.go @@ -14,6 +14,7 @@ import ( "github.com/jf-tech/omniparser/extensions/omniv21/fileformat" "github.com/jf-tech/omniparser/extensions/omniv21/transform" v21validation "github.com/jf-tech/omniparser/extensions/omniv21/validation" + "github.com/jf-tech/omniparser/header" "github.com/jf-tech/omniparser/validation" ) @@ -76,7 +77,10 @@ func (f *fixedLengthFormat) validateFileDecl(decl *FileDecl) error { } func (f *fixedLengthFormat) CreateFormatReader( - name string, r io.Reader, runtime interface{}) (fileformat.FormatReader, error) { + _ header.Header, + name string, + r io.Reader, + runtime interface{}) (fileformat.FormatReader, error) { rt := runtime.(*fixedLengthFormatRuntime) targetXPathExpr, err := func() (*xpath.Expr, error) { if rt.XPath == "" || rt.XPath == "." { diff --git a/extensions/omniv21/fileformat/flatfile/fixedlength/format_test.go b/extensions/omniv21/fileformat/flatfile/fixedlength/format_test.go index 651cfb3..e71e756 100644 --- a/extensions/omniv21/fileformat/flatfile/fixedlength/format_test.go +++ b/extensions/omniv21/fileformat/flatfile/fixedlength/format_test.go @@ -12,6 +12,7 @@ import ( "github.com/jf-tech/omniparser/errs" "github.com/jf-tech/omniparser/extensions/omniv21/transform" + "github.com/jf-tech/omniparser/header" "github.com/jf-tech/omniparser/idr" ) @@ -269,6 +270,7 @@ func TestCreateFormatReader(t *testing.T) { &transform.Decl{XPath: finalOutputXPath}) assert.NoError(t, err) reader, err := format.CreateFormatReader( + header.Header{}, "test-input", strings.NewReader("abcd\n1234\n"), runtime) @@ -286,6 +288,7 @@ func TestCreateFormatReader(t *testing.T) { // test CreateFormatReader called with invalid target xpath. reader, err := NewFixedLengthFileFormat("test-schema").CreateFormatReader( + header.Header{}, "test-input", strings.NewReader("abcd\n1234\n"), &fixedLengthFormatRuntime{XPath: "["}) diff --git a/extensions/omniv21/fileformat/flatfile/fixedlength/reader_test.go b/extensions/omniv21/fileformat/flatfile/fixedlength/reader_test.go index 4639c6c..19cf2ec 100644 --- a/extensions/omniv21/fileformat/flatfile/fixedlength/reader_test.go +++ b/extensions/omniv21/fileformat/flatfile/fixedlength/reader_test.go @@ -12,6 +12,7 @@ import ( "github.com/jf-tech/go-corelib/strs" "github.com/jf-tech/go-corelib/testlib" "github.com/jf-tech/omniparser/extensions/omniv21/transform" + "github.com/jf-tech/omniparser/header" "github.com/jf-tech/omniparser/idr" "github.com/stretchr/testify/assert" ) @@ -81,7 +82,7 @@ func TestRead(t *testing.T) { }, } { t.Run(test.name, func(t *testing.T) { - r, err := format.CreateFormatReader("test-input", test.r, rt) + r, err := format.CreateFormatReader(header.Header{}, "test-input", test.r, rt) assert.NoError(t, err) n, err := r.Read() if strs.IsStrNonBlank(test.err) { diff --git a/extensions/omniv21/fileformat/json/format.go b/extensions/omniv21/fileformat/json/format.go index ed762d8..72f9bd2 100644 --- a/extensions/omniv21/fileformat/json/format.go +++ b/extensions/omniv21/fileformat/json/format.go @@ -10,6 +10,7 @@ import ( "github.com/jf-tech/omniparser/errs" "github.com/jf-tech/omniparser/extensions/omniv21/fileformat" "github.com/jf-tech/omniparser/extensions/omniv21/transform" + "github.com/jf-tech/omniparser/header" ) const ( @@ -41,7 +42,10 @@ func (f *jsonFileFormat) ValidateSchema(format string, _ []byte, finalOutputDecl } func (f *jsonFileFormat) CreateFormatReader( - name string, r io.Reader, runtime interface{}) (fileformat.FormatReader, error) { + _ header.Header, + name string, + r io.Reader, + runtime interface{}) (fileformat.FormatReader, error) { return NewReader(name, r, runtime.(string)) } diff --git a/extensions/omniv21/fileformat/json/format_test.go b/extensions/omniv21/fileformat/json/format_test.go index d65443c..ab1d496 100644 --- a/extensions/omniv21/fileformat/json/format_test.go +++ b/extensions/omniv21/fileformat/json/format_test.go @@ -11,6 +11,7 @@ import ( "github.com/jf-tech/omniparser/errs" "github.com/jf-tech/omniparser/extensions/omniv21/transform" + "github.com/jf-tech/omniparser/header" "github.com/jf-tech/omniparser/idr" ) @@ -74,6 +75,7 @@ func TestValidateSchema(t *testing.T) { func TestCreateFormatReader(t *testing.T) { r, err := NewJSONFileFormat("test-schema").CreateFormatReader( + header.Header{}, "test-input", strings.NewReader(`["B1", "B2", "B3"]`), "/*[.!='B2']") @@ -96,7 +98,8 @@ func TestCreateFormatReader(t *testing.T) { assert.Nil(t, n3) }) - r, err = NewJSONFileFormat("test-schema").CreateFormatReader("test-input", strings.NewReader(""), "[invalid") + r, err = NewJSONFileFormat("test-schema").CreateFormatReader( + header.Header{}, "test-input", strings.NewReader(""), "[invalid") assert.Error(t, err) assert.Equal(t, `invalid xpath '[invalid', err: expression must evaluate to a node-set`, err.Error()) assert.Nil(t, r) diff --git a/extensions/omniv21/fileformat/xml/format.go b/extensions/omniv21/fileformat/xml/format.go index de7f4c1..6aea047 100644 --- a/extensions/omniv21/fileformat/xml/format.go +++ b/extensions/omniv21/fileformat/xml/format.go @@ -10,6 +10,7 @@ import ( "github.com/jf-tech/omniparser/errs" "github.com/jf-tech/omniparser/extensions/omniv21/fileformat" "github.com/jf-tech/omniparser/extensions/omniv21/transform" + "github.com/jf-tech/omniparser/header" ) const ( @@ -41,7 +42,10 @@ func (f *xmlFileFormat) ValidateSchema(format string, _ []byte, finalOutputDecl } func (f *xmlFileFormat) CreateFormatReader( - name string, r io.Reader, runtime interface{}) (fileformat.FormatReader, error) { + _ header.Header, + name string, + r io.Reader, + runtime interface{}) (fileformat.FormatReader, error) { return NewReader(name, r, runtime.(string)) } diff --git a/extensions/omniv21/fileformat/xml/format_test.go b/extensions/omniv21/fileformat/xml/format_test.go index b432c72..863f5c9 100644 --- a/extensions/omniv21/fileformat/xml/format_test.go +++ b/extensions/omniv21/fileformat/xml/format_test.go @@ -11,6 +11,7 @@ import ( "github.com/jf-tech/omniparser/errs" "github.com/jf-tech/omniparser/extensions/omniv21/transform" + "github.com/jf-tech/omniparser/header" "github.com/jf-tech/omniparser/idr" ) @@ -74,6 +75,7 @@ func TestValidateSchema(t *testing.T) { func TestCreateFormatReader(t *testing.T) { r, err := NewXMLFileFormat("test-schema").CreateFormatReader( + header.Header{}, "test-input", strings.NewReader(`data1skipdata2`), "/A/B[.!='skip']") @@ -96,7 +98,8 @@ func TestCreateFormatReader(t *testing.T) { assert.Nil(t, n3) }) - r, err = NewXMLFileFormat("test-schema").CreateFormatReader("test-input", strings.NewReader(""), "[invalid") + r, err = NewXMLFileFormat("test-schema").CreateFormatReader( + header.Header{}, "test-input", strings.NewReader(""), "[invalid") assert.Error(t, err) assert.Equal(t, `invalid xpath '[invalid', err: expression must evaluate to a node-set`, err.Error()) assert.Nil(t, r) diff --git a/extensions/omniv21/samples/csv2/.snapshots/Test1_Single_Row b/extensions/omniv21/samples/csv2/.snapshots/Test1_Single_Row index 42e2089..c44cdba 100644 --- a/extensions/omniv21/samples/csv2/.snapshots/Test1_Single_Row +++ b/extensions/omniv21/samples/csv2/.snapshots/Test1_Single_Row @@ -1,8 +1,9 @@ [ { - "RawRecord": "{\"DATE\":\"2019/01/31T12:34:56-0800\",\"HIGH_TEMP_C\":\"10.5\",\"LAT\":\"37.7749\",\"LONG\":\"122.4194\",\"LOW_TEMP_F\":\"30.2\",\"NOTE\":\"note 1\",\"UV_INDEX\":\"12/4/6\",\"WIND_DIR\":\"N\",\"WIND_SPEED_KMH\":\"33\"}", - "RawRecordHash": "24a341e6-bdac-3319-ac76-7354d42a7402", + "RawRecord": "{\"DATE\":\"2019/01/31T12:34:56-0800\",\"HIGH_TEMP_C\":\"10.5\",\"LAT\":\"37.7749\",\"LONG\":\"122.4194\",\"LOW_TEMP_F\":\"30.2\",\"NOTE\":\"note 1\",\"UV_INDEX\":\"12/4/6\",\"WIND_DIR\":\"N\",\"WIND_SPEED_KMH\":\"33\",\"__debug\":{\"line_num\":\"2\"}}", + "RawRecordHash": "25e5f4d9-4b1b-3038-9b06-7fbaa622dd33", "TransformedRecord": { + "__debug_line_num": "2", "date": "2019-01-31T12:34:56-08:00", "high_temperature_fahrenheit": 50.9, "latitude": 37.7749, @@ -18,9 +19,10 @@ } }, { - "RawRecord": "{\"DATE\":\"2020/07/31T01:23:45-0500\",\"HIGH_TEMP_C\":\"39\",\"LAT\":\"32.7767\",\"LONG\":\"96.7970\",\"LOW_TEMP_F\":\"95\",\"NOTE\":\"' note with bad quotes\",\"UV_INDEX\":\"9/5/6\",\"WIND_DIR\":\"SE\",\"WIND_SPEED_KMH\":\"8\"}", - "RawRecordHash": "dba160be-3cfe-3efc-a891-f76461c37c08", + "RawRecord": "{\"DATE\":\"2020/07/31T01:23:45-0500\",\"HIGH_TEMP_C\":\"39\",\"LAT\":\"32.7767\",\"LONG\":\"96.7970\",\"LOW_TEMP_F\":\"95\",\"NOTE\":\"' note with bad quotes\",\"UV_INDEX\":\"9/5/6\",\"WIND_DIR\":\"SE\",\"WIND_SPEED_KMH\":\"8\",\"__debug\":{\"line_num\":\"4\"}}", + "RawRecordHash": "7da1f5c3-9cde-35d7-8c48-3ac8cf900328", "TransformedRecord": { + "__debug_line_num": "4", "date": "2020-07-31T01:23:45-05:00", "high_temperature_fahrenheit": 102.2, "latitude": 32.7767, @@ -36,9 +38,10 @@ } }, { - "RawRecord": "{\"DATE\":\"2030/11/22T20:18:00-0500\",\"HIGH_TEMP_C\":\"15.5\",\"LAT\":\"39.0997\",\"LONG\":\"94.5786\",\"LOW_TEMP_F\":\"17\",\"NOTE\":\"note 3\",\"UV_INDEX\":\"10/3/4\",\"WIND_DIR\":\"X\",\"WIND_SPEED_KMH\":\"180\"}", - "RawRecordHash": "fcdd707d-1ed4-3641-aca3-b0df568b1084", + "RawRecord": "{\"DATE\":\"2030/11/22T20:18:00-0500\",\"HIGH_TEMP_C\":\"15.5\",\"LAT\":\"39.0997\",\"LONG\":\"94.5786\",\"LOW_TEMP_F\":\"17\",\"NOTE\":\"note 3\",\"UV_INDEX\":\"10/3/4\",\"WIND_DIR\":\"X\",\"WIND_SPEED_KMH\":\"180\",\"__debug\":{\"line_num\":\"5\"}}", + "RawRecordHash": "d33d434a-f28c-355f-8de2-0ed8859fb0e7", "TransformedRecord": { + "__debug_line_num": "5", "date": "2030-11-22T20:18:00-05:00", "high_temperature_fahrenheit": 59.9, "latitude": 39.0997, diff --git a/extensions/omniv21/samples/csv2/1_single_row.schema.json b/extensions/omniv21/samples/csv2/1_single_row.schema.json index c310331..beb4af7 100644 --- a/extensions/omniv21/samples/csv2/1_single_row.schema.json +++ b/extensions/omniv21/samples/csv2/1_single_row.schema.json @@ -1,7 +1,8 @@ { "parser_settings": { "version": "omni.2.1", - "file_format_type": "csv2" + "file_format_type": "csv2", + "debug": 1 }, "file_declaration": { "delimiter": "|", @@ -29,6 +30,7 @@ }, "transform_declarations": { "FINAL_OUTPUT": { "xpath": ".[DATE != 'N/A']" ,"object": { + "__debug_line_num": { "xpath": "__debug/line_num" }, "uv_index": { "custom_func": { "name": "javascript", diff --git a/extensions/omniv21/samples/customfileformats/jsonlog/jsonlogformat/jsonlogformat.go b/extensions/omniv21/samples/customfileformats/jsonlog/jsonlogformat/jsonlogformat.go index 3cfb1b0..7bb1352 100644 --- a/extensions/omniv21/samples/customfileformats/jsonlog/jsonlogformat/jsonlogformat.go +++ b/extensions/omniv21/samples/customfileformats/jsonlog/jsonlogformat/jsonlogformat.go @@ -10,6 +10,7 @@ import ( "github.com/jf-tech/omniparser/errs" "github.com/jf-tech/omniparser/extensions/omniv21/fileformat" "github.com/jf-tech/omniparser/extensions/omniv21/transform" + "github.com/jf-tech/omniparser/header" ) const ( @@ -46,7 +47,10 @@ func (p *jsonLogFileFormat) ValidateSchema( } func (p *jsonLogFileFormat) CreateFormatReader( - name string, r io.Reader, runtime interface{}) (fileformat.FormatReader, error) { + _ header.Header, + name string, + r io.Reader, + runtime interface{}) (fileformat.FormatReader, error) { return NewReader(name, r, runtime.(string)) } diff --git a/extensions/omniv21/schemahandler.go b/extensions/omniv21/schemahandler.go index dfd2118..0592918 100644 --- a/extensions/omniv21/schemahandler.go +++ b/extensions/omniv21/schemahandler.go @@ -115,7 +115,7 @@ type schemaHandler struct { } func (h *schemaHandler) NewIngester(ctx *transformctx.Ctx, input io.Reader) (schemahandler.Ingester, error) { - reader, err := h.fileFormat.CreateFormatReader(ctx.InputName, input, h.formatRuntime) + reader, err := h.fileFormat.CreateFormatReader(h.ctx.Header, ctx.InputName, input, h.formatRuntime) if err != nil { return nil, err } diff --git a/extensions/omniv21/schemahandler_test.go b/extensions/omniv21/schemahandler_test.go index 5d6e5f4..be6c0d0 100644 --- a/extensions/omniv21/schemahandler_test.go +++ b/extensions/omniv21/schemahandler_test.go @@ -34,21 +34,26 @@ func (f testFileFormat) ValidateSchema(_ string, _ []byte, _ *transform.Decl) (i } func (f testFileFormat) CreateFormatReader( - inputName string, input io.Reader, runtime interface{}) (fileformat.FormatReader, error) { + schemaHeader header.Header, + inputName string, + input io.Reader, + runtime interface{}) (fileformat.FormatReader, error) { if f.createFormatReaderErr != nil { return nil, f.createFormatReaderErr } return testFormatReader{ - inputName: inputName, - input: input, - runtime: runtime, + schemaHeader: schemaHeader, + inputName: inputName, + input: input, + runtime: runtime, }, nil } type testFormatReader struct { - inputName string - input io.Reader - runtime interface{} + schemaHeader header.Header + inputName string + input io.Reader + runtime interface{} } func (r testFormatReader) Read() (*idr.Node, error) { panic("implement me") } @@ -237,6 +242,7 @@ func TestCreateHandler_CustomParseFuncs_Success(t *testing.T) { func TestNewIngester_CustomFileFormat_Failure(t *testing.T) { ip, err := (&schemaHandler{ + ctx: &schemahandler.CreateCtx{}, fileFormat: testFileFormat{ createFormatReaderErr: errors.New("failed to create reader"), }, diff --git a/header/header.go b/header/header.go index ed4c173..c8c52f0 100644 --- a/header/header.go +++ b/header/header.go @@ -10,11 +10,13 @@ import ( // ParserSettings defines the common header (and its JSON format) for all schemas across all schema handlers. // It contains vital information about which handler a schema wants to use, and what file format the input // stream is of (e.g. fixed-length txt, CSV/TSV, XML, JSON, EDI, etc). Optionally, it specifies the expected -// encoding scheme for the input streams this schema is used for. +// encoding scheme for the input streams this schema is used for, as well as other auxiliary but global +// parser settings. type ParserSettings struct { Version string `json:"version,omitempty"` FileFormatType string `json:"file_format_type,omitempty"` Encoding *string `json:"encoding,omitempty"` + Debug int `json:"debug,omitempty"` } const ( diff --git a/validation/parserSettings.go b/validation/parserSettings.go index 52c1ccf..3e18322 100644 --- a/validation/parserSettings.go +++ b/validation/parserSettings.go @@ -19,7 +19,8 @@ const ( "encoding": { "type": "string", "enum": [ "utf-8", "iso-8859-1", "windows-1252" ] - } + }, + "debug": { "type": "integer", "minimum": 0 } }, "required": [ "version", "file_format_type" ], "additionalProperties": false diff --git a/validation/parserSettings.json b/validation/parserSettings.json index 0a3849f..5979e49 100644 --- a/validation/parserSettings.json +++ b/validation/parserSettings.json @@ -12,7 +12,8 @@ "encoding": { "type": "string", "enum": [ "utf-8", "iso-8859-1", "windows-1252" ] - } + }, + "debug": { "type": "integer", "minimum": 0 } }, "required": [ "version", "file_format_type" ], "additionalProperties": false