diff --git a/go.mod b/go.mod index e3424bb6e0..4fa7aeef9e 100644 --- a/go.mod +++ b/go.mod @@ -39,7 +39,7 @@ require ( github.com/jpillora/longestcommon v0.0.0-20161227235612-adb9d91ee629 github.com/json-iterator/go v1.1.12 github.com/keboola/go-client v1.26.6 - github.com/keboola/go-utils v1.0.3 + github.com/keboola/go-utils v1.1.0 github.com/klauspost/compress v1.17.9 github.com/klauspost/pgzip v1.2.6 github.com/kylelemons/godebug v1.1.0 @@ -67,6 +67,7 @@ require ( github.com/umisama/go-regexpcache v0.0.0-20150417035358-2444a542492f github.com/urfave/negroni v1.0.0 github.com/valyala/fasthttp v1.55.0 + github.com/valyala/fastjson v1.6.4 github.com/writeas/go-strip-markdown v2.0.1+incompatible github.com/xtaci/kcp-go/v5 v5.6.8 go.etcd.io/etcd/api/v3 v3.5.14 diff --git a/go.sum b/go.sum index b1e822fefd..4817e8e758 100644 --- a/go.sum +++ b/go.sum @@ -420,8 +420,8 @@ github.com/keboola/go-mockoidc v0.0.0-20240405064136-5229d2b53db6 h1:HcvX1VQkiav github.com/keboola/go-mockoidc v0.0.0-20240405064136-5229d2b53db6/go.mod h1:eDjgYHYDJbPLBLsyZ6qRaugP0mX8vePOhZ5id1fdzJw= github.com/keboola/go-oauth2-proxy/v7 v7.6.1-0.20240418143152-9d00aaa29562 h1:EiwSnkbGt2i6XxvjDMrWx6/bGlQjVs+yq1mDJ5b3U1U= github.com/keboola/go-oauth2-proxy/v7 v7.6.1-0.20240418143152-9d00aaa29562/go.mod h1:uPrZkzwsuFyIPP04hIt6TG2KvWujglvkOnUUnQJyIdw= -github.com/keboola/go-utils v1.0.3 h1:8ybBfqsJC8k6Xrte6d6PBStehmMabbki2u0H6WI/cfc= -github.com/keboola/go-utils v1.0.3/go.mod h1:p+AIGpqlL7c0X+MWNOLdkAt2rMM5JXlycWwkOKmlrps= +github.com/keboola/go-utils v1.1.0 h1:2tJikVr1kESR88qeGy/Vtmendw7TXB1UrJKLfPKceSk= +github.com/keboola/go-utils v1.1.0/go.mod h1:4YVC2/V0QwgHqxtch8JAVDNVI1aINF2arJ7sh6TO1GY= github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8= github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck= github.com/klauspost/compress v1.17.9 h1:6KIumPrER1LHsvBVuDa0r5xaG0Es51mhhB9BQB2qeMA= @@ -662,6 +662,8 @@ github.com/valyala/bytebufferpool v1.0.0 h1:GqA5TC/0021Y/b9FG4Oi9Mr3q7XYx6Kllzaw github.com/valyala/bytebufferpool v1.0.0/go.mod h1:6bBcMArwyJ5K/AmCkWv1jt77kVWyCJ6HpOuEn7z0Csc= github.com/valyala/fasthttp v1.55.0 h1:Zkefzgt6a7+bVKHnu/YaYSOPfNYNisSVBo/unVCf8k8= github.com/valyala/fasthttp v1.55.0/go.mod h1:NkY9JtkrpPKmgwV3HTaS2HWaJss9RSIsRVfcxxoHiOM= +github.com/valyala/fastjson v1.6.4 h1:uAUNq9Z6ymTgGhcm0UynUAB6tlbakBrz6CQFax3BXVQ= +github.com/valyala/fastjson v1.6.4/go.mod h1:CLCAqky6SMuOcxStkYQvblddUtoRxhYMGLrsQns1aXY= github.com/vmihailenco/msgpack/v5 v5.4.1 h1:cQriyiUvjTwOHg8QZaPihLWeRAAVoCpE00IUPn0Bjt8= github.com/vmihailenco/msgpack/v5 v5.4.1/go.mod h1:GaZTsDaehaPpQVyxrf5mtQlH+pc21PIudVV/E3rRQok= github.com/vmihailenco/tagparser/v2 v2.0.0 h1:y09buUbR+b5aycVFQs/g70pqKVZNBmxwAhO7/IwNM9g= diff --git a/internal/pkg/service/stream/mapping/recordctx/fasthttp.go b/internal/pkg/service/stream/mapping/recordctx/fasthttp.go index f34bf1063e..743af7f2a0 100644 --- a/internal/pkg/service/stream/mapping/recordctx/fasthttp.go +++ b/internal/pkg/service/stream/mapping/recordctx/fasthttp.go @@ -11,6 +11,7 @@ import ( "github.com/keboola/go-utils/pkg/orderedmap" "github.com/valyala/fasthttp" + "github.com/valyala/fastjson" "github.com/keboola/keboola-as-code/internal/pkg/utils/errors" ) @@ -27,6 +28,8 @@ type fastHTTPContext struct { bodyStringErr error bodyMap *orderedmap.OrderedMap bodyMapErr error + jsonValue *fastjson.Value + jsonValueErr error } func FromFastHTTP(ctx context.Context, timestamp time.Time, req *fasthttp.RequestCtx) Context { @@ -122,6 +125,28 @@ func (c *fastHTTPContext) BodyMap() (*orderedmap.OrderedMap, error) { return c.bodyMap, c.bodyMapErr } +func (c *fastHTTPContext) JSONValue(parserPool *fastjson.ParserPool) (*fastjson.Value, error) { + c.lock.Lock() + defer c.lock.Unlock() + + if c.jsonValue == nil && c.jsonValueErr == nil { + if body, err := c.BodyBytes(); err != nil { + c.jsonValueErr = err + } else { + parser := parserPool.Get() + defer parserPool.Put(parser) + + if jsonValue, err := parser.ParseBytes(body); err != nil { + c.jsonValueErr = errors.PrefixError(err, "cannot parse request json") + } else { + c.jsonValue = jsonValue + } + } + } + + return c.jsonValue, c.jsonValueErr +} + func (c *fastHTTPContext) headersToMap() *orderedmap.OrderedMap { out := orderedmap.New() for _, k := range c.req.Request.Header.PeekKeys() { diff --git a/internal/pkg/service/stream/mapping/recordctx/http.go b/internal/pkg/service/stream/mapping/recordctx/http.go index eab82113a8..cae554f2b8 100644 --- a/internal/pkg/service/stream/mapping/recordctx/http.go +++ b/internal/pkg/service/stream/mapping/recordctx/http.go @@ -11,6 +11,7 @@ import ( "time" "github.com/keboola/go-utils/pkg/orderedmap" + "github.com/valyala/fastjson" "github.com/keboola/keboola-as-code/internal/pkg/utils/errors" "github.com/keboola/keboola-as-code/internal/pkg/utils/ip" @@ -29,6 +30,8 @@ type httpContext struct { bodyBytesErr error bodyMap *orderedmap.OrderedMap bodyMapErr error + jsonValue *fastjson.Value + jsonValueErr error } func FromHTTP(timestamp time.Time, req *http.Request) Context { @@ -124,6 +127,28 @@ func (c *httpContext) BodyMap() (*orderedmap.OrderedMap, error) { return c.bodyMap, c.bodyMapErr } +func (c *httpContext) JSONValue(parserPool *fastjson.ParserPool) (*fastjson.Value, error) { + c.lock.Lock() + defer c.lock.Unlock() + + if c.jsonValue == nil && c.jsonValueErr == nil { + if body, err := c.bodyBytesWithoutLock(); err != nil { + c.jsonValueErr = err + } else { + parser := parserPool.Get() + defer parserPool.Put(parser) + + if jsonValue, err := parser.ParseBytes(body); err != nil { + c.jsonValueErr = errors.PrefixError(err, "cannot parse request json") + } else { + c.jsonValue = jsonValue + } + } + } + + return c.jsonValue, c.jsonValueErr +} + func (c *httpContext) bodyBytesWithoutLock() ([]byte, error) { if c.bodyBytes == nil && c.bodyBytesErr == nil { c.bodyBytes, c.bodyBytesErr = io.ReadAll(c.req.Body) diff --git a/internal/pkg/service/stream/mapping/recordctx/recordctx.go b/internal/pkg/service/stream/mapping/recordctx/recordctx.go index 68c5a42dc7..a629900f5f 100644 --- a/internal/pkg/service/stream/mapping/recordctx/recordctx.go +++ b/internal/pkg/service/stream/mapping/recordctx/recordctx.go @@ -7,6 +7,7 @@ import ( "time" "github.com/keboola/go-utils/pkg/orderedmap" + "github.com/valyala/fastjson" ) type Context interface { @@ -18,4 +19,5 @@ type Context interface { BodyString() (string, error) BodyBytes() ([]byte, error) BodyMap() (*orderedmap.OrderedMap, error) + JSONValue(*fastjson.ParserPool) (*fastjson.Value, error) } diff --git a/internal/pkg/service/stream/mapping/recordctx/utils.go b/internal/pkg/service/stream/mapping/recordctx/utils.go index a8fcf46500..2dd8e0e8b3 100644 --- a/internal/pkg/service/stream/mapping/recordctx/utils.go +++ b/internal/pkg/service/stream/mapping/recordctx/utils.go @@ -1,37 +1,27 @@ package recordctx import ( - "strings" - jsoniter "github.com/json-iterator/go" "github.com/keboola/go-utils/pkg/orderedmap" - "github.com/umisama/go-regexpcache" serviceError "github.com/keboola/keboola-as-code/internal/pkg/service/common/errors" "github.com/keboola/keboola-as-code/internal/pkg/utils/errors" + "github.com/keboola/keboola-as-code/internal/pkg/utils/httputils" utilsUrl "github.com/keboola/keboola-as-code/internal/pkg/utils/url" ) // json - replacement of the standard encoding/json library, it is faster for larger responses. var json = jsoniter.ConfigCompatibleWithStandardLibrary //nolint:gochecknoglobals -func isContentTypeJSON(t string) bool { - return regexpcache.MustCompile(`^application/([a-zA-Z0-9\.\-]+\+)?json$`).MatchString(t) -} - -func isContentTypeForm(t string) bool { - return strings.HasPrefix(t, "application/x-www-form-urlencoded") -} - func parseBody(contentType string, body []byte) (data *orderedmap.OrderedMap, err error) { // Decode switch { - case isContentTypeForm(contentType): + case httputils.IsContentTypeForm(contentType): data, err = utilsUrl.ParseQuery(string(body)) if err != nil { return nil, serviceError.NewBadRequestError(errors.Errorf("invalid form data: %w", err)) } - case isContentTypeJSON(contentType): + case httputils.IsContentTypeJSON(contentType): err = json.Unmarshal(body, &data) if err != nil { return nil, serviceError.NewBadRequestError(errors.Errorf("invalid JSON: %w", err)) diff --git a/internal/pkg/service/stream/mapping/table/column/renderer.go b/internal/pkg/service/stream/mapping/table/column/renderer.go index 479f034268..5ff426c5e5 100644 --- a/internal/pkg/service/stream/mapping/table/column/renderer.go +++ b/internal/pkg/service/stream/mapping/table/column/renderer.go @@ -1,24 +1,31 @@ package column import ( + "fmt" + "strconv" "strings" "github.com/gofrs/uuid/v5" + "github.com/keboola/go-utils/pkg/orderedmap" + "github.com/valyala/fastjson" "github.com/keboola/keboola-as-code/internal/pkg/encoding/json" jsonnetWrapper "github.com/keboola/keboola-as-code/internal/pkg/encoding/jsonnet" "github.com/keboola/keboola-as-code/internal/pkg/service/stream/mapping/jsonnet" "github.com/keboola/keboola-as-code/internal/pkg/service/stream/mapping/recordctx" "github.com/keboola/keboola-as-code/internal/pkg/utils/errors" + "github.com/keboola/keboola-as-code/internal/pkg/utils/httputils" ) type Renderer struct { - jsonnetPool *jsonnetWrapper.VMPool[recordctx.Context] + jsonnetPool *jsonnetWrapper.VMPool[recordctx.Context] + fastjsonPool *fastjson.ParserPool } func NewRenderer() *Renderer { return &Renderer{ - jsonnetPool: jsonnet.NewPool(), + jsonnetPool: jsonnet.NewPool(), + fastjsonPool: &fastjson.ParserPool{}, } } @@ -41,33 +48,13 @@ func (r *Renderer) CSVValue(c Column, ctx recordctx.Context) (string, error) { case IP: return ctx.ClientIP().String(), nil case Path: - bodyMap, err := ctx.BodyMap() - if err != nil { - return "", err - } + contentType, ok := ctx.HeadersMap().GetOrNil("Content-Type").(string) - var value any - - if c.Path == "" { - value = bodyMap - } else { - value, _, err = bodyMap.GetNested(c.Path) - if err != nil { - if c.DefaultValue != nil { - value = *c.DefaultValue - } else { - return "", errors.Wrapf(err, `path "%s" not found in the body`, c.Path) - } - } + if ok && httputils.IsContentTypeJSON(contentType) { + return r.jsonPathCSVValue(c, ctx) } - if c.RawString { - if stringValue, ok := value.(string); ok { - return stringValue, nil - } - } - - return json.EncodeString(value, false) + return r.mapPathCSVValue(c, ctx) case Template: if c.Template.Language != TemplateLanguageJsonnet { return "", errors.Errorf(`unsupported language "%s", only "jsonnet" is supported`, c.Template.Language) @@ -86,3 +73,87 @@ func (r *Renderer) CSVValue(c Column, ctx recordctx.Context) (string, error) { return "", errors.Errorf("unknown column type %T", c) } + +func (r *Renderer) mapPathCSVValue(c Path, ctx recordctx.Context) (string, error) { + bodyMap, err := ctx.BodyMap() + if err != nil { + return "", err + } + + var value any + + if c.Path == "" { + value = bodyMap + } else { + value, _, err = bodyMap.GetNested(c.Path) + if err != nil { + if c.DefaultValue != nil { + value = *c.DefaultValue + } else { + return "", errors.Wrapf(err, `path "%s" not found in the body`, c.Path) + } + } + } + + if c.RawString { + if stringValue, ok := value.(string); ok { + return stringValue, nil + } + } + + return json.EncodeString(value, false) +} + +func (r *Renderer) jsonPathCSVValue(c Path, ctx recordctx.Context) (string, error) { + // Get fastjson.Value, needs to be cached in recordctx as it might be used in multiple columns + value, err := ctx.JSONValue(r.fastjsonPool) + if err != nil { + return "", err + } + + var resultErr error + + path := orderedmap.PathFromStr(c.Path) + if len(path) > 0 { + // Transform orderedmap.Path to a slice of keys used by fastjson + keys := []string{} + for _, step := range path { + var key string + + switch step := step.(type) { + case orderedmap.MapStep: + key = step.Key() + case orderedmap.SliceStep: + key = strconv.Itoa(step.Index()) + } + + keys = append(keys, key) + } + + // Fetch the desired value from the json + if value.Exists(keys...) { + value = value.Get(keys...) + } else { + resultErr = errors.New(fmt.Sprintf(`path "%s" not found in the body`, c.Path)) + } + } + + if resultErr == nil { + // Return unquoted string if the value is a string and RawString is set to true. + if c.RawString && value.Type() == fastjson.TypeString { + return string(value.GetStringBytes()), nil + } + + // Return the found value (json encoded) + return value.String(), nil + } else if c.DefaultValue != nil { + // An error happened while processing the path, but we have a DefaultValue to use. + if c.RawString { + return *c.DefaultValue, nil + } + + return json.EncodeString(*c.DefaultValue, false) + } + + return "", resultErr +} diff --git a/internal/pkg/service/stream/mapping/table/column/renderer_benchmark_test.go b/internal/pkg/service/stream/mapping/table/column/renderer_benchmark_test.go index a85152e220..e6a0acbac0 100644 --- a/internal/pkg/service/stream/mapping/table/column/renderer_benchmark_test.go +++ b/internal/pkg/service/stream/mapping/table/column/renderer_benchmark_test.go @@ -20,10 +20,11 @@ func BenchmarkColumn_Path(b *testing.B) { body := `{"key1":[{"key2":"val2"},{"key3":"val3"}]}` header := http.Header{"Content-Type": []string{"application/json"}} - reqCtx := recordctx.FromHTTP(time.Now(), &http.Request{Header: header, Body: io.NopCloser(strings.NewReader(body))}) renderer := column.NewRenderer() for i := 0; i < b.N; i++ { + // reqCtx needs to be created separately for each request, otherwise the parsed json is cached + reqCtx := recordctx.FromHTTP(time.Now(), &http.Request{Header: header, Body: io.NopCloser(strings.NewReader(body))}) val, err := renderer.CSVValue(c, reqCtx) assert.NoError(b, err) assert.Equal(b, `"val3"`, val) @@ -38,10 +39,10 @@ func BenchmarkColumn_Template_Jsonnet(b *testing.B) { body := `{"key1":[{"key2":"val2"},{"key3":"val3"}]}` header := http.Header{"Content-Type": []string{"application/json"}} - reqCtx := recordctx.FromHTTP(time.Now(), &http.Request{Header: header, Body: io.NopCloser(strings.NewReader(body))}) renderer := column.NewRenderer() for i := 0; i < b.N; i++ { + reqCtx := recordctx.FromHTTP(time.Now(), &http.Request{Header: header, Body: io.NopCloser(strings.NewReader(body))}) val, err := renderer.CSVValue(c, reqCtx) assert.NoError(b, err) assert.Equal(b, `"val3"`, val) @@ -51,10 +52,10 @@ func BenchmarkColumn_Template_Jsonnet(b *testing.B) { func BenchmarkColumn_UUID(b *testing.B) { c := column.UUID{} - reqCtx := recordctx.FromHTTP(time.Now(), &http.Request{}) renderer := column.NewRenderer() for i := 0; i < b.N; i++ { + reqCtx := recordctx.FromHTTP(time.Now(), &http.Request{}) val, err := renderer.CSVValue(c, reqCtx) assert.NoError(b, err) assert.Len(b, val, 36) diff --git a/internal/pkg/service/stream/mapping/table/column/renderer_test.go b/internal/pkg/service/stream/mapping/table/column/renderer_test.go index 420eababc5..fc5ee96579 100644 --- a/internal/pkg/service/stream/mapping/table/column/renderer_test.go +++ b/internal/pkg/service/stream/mapping/table/column/renderer_test.go @@ -9,6 +9,7 @@ import ( "github.com/gofrs/uuid/v5" "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" "github.com/keboola/keboola-as-code/internal/pkg/service/common/ptr" "github.com/keboola/keboola-as-code/internal/pkg/service/stream/mapping/recordctx" @@ -92,6 +93,38 @@ func TestRenderer_Path_Json_Scalar(t *testing.T) { assert.Equal(t, `"val2"`, val) } +func TestRenderer_Path_Json_Integer(t *testing.T) { + t.Parallel() + + renderer := column.NewRenderer() + c := column.Path{ + Path: "key1.key2", + } + + body := `{"key1":{"key2":42},"key3":"val3"}` + header := http.Header{"Content-Type": []string{"application/json"}} + + val, err := renderer.CSVValue(c, recordctx.FromHTTP(time.Now(), &http.Request{Header: header, Body: io.NopCloser(strings.NewReader(body))})) + assert.NoError(t, err) + assert.Equal(t, `42`, val) +} + +func TestRenderer_Path_Json_Float(t *testing.T) { + t.Parallel() + + renderer := column.NewRenderer() + c := column.Path{ + Path: "key1.key2", + } + + body := `{"key1":{"key2":42.1},"key3":"val3"}` + header := http.Header{"Content-Type": []string{"application/json"}} + + val, err := renderer.CSVValue(c, recordctx.FromHTTP(time.Now(), &http.Request{Header: header, Body: io.NopCloser(strings.NewReader(body))})) + assert.NoError(t, err) + assert.Equal(t, `42.1`, val) +} + func TestRenderer_Path_Json_Object(t *testing.T) { t.Parallel() @@ -183,7 +216,7 @@ func TestRenderer_Path_Json_UndefinedKey_Error(t *testing.T) { header := http.Header{"Content-Type": []string{"application/json"}} _, err := renderer.CSVValue(c, recordctx.FromHTTP(time.Now(), &http.Request{Header: header, Body: io.NopCloser(strings.NewReader(body))})) - assert.Error(t, err) + require.Error(t, err) assert.Equal(t, `path "key1.invalid" not found in the body`, err.Error()) } @@ -199,7 +232,7 @@ func TestRenderer_Path_Json_UndefinedIndex_Error(t *testing.T) { header := http.Header{"Content-Type": []string{"application/json"}} _, err := renderer.CSVValue(c, recordctx.FromHTTP(time.Now(), &http.Request{Header: header, Body: io.NopCloser(strings.NewReader(body))})) - assert.Error(t, err) + require.Error(t, err) assert.Equal(t, `path "key1[1]" not found in the body`, err.Error()) } @@ -266,7 +299,155 @@ func TestRenderer_Path_FormData_Full(t *testing.T) { val, err := renderer.CSVValue(c, recordctx.FromHTTP(time.Now(), &http.Request{Header: header, Body: io.NopCloser(strings.NewReader(body))})) assert.NoError(t, err) - assert.Equal(t, `{"key1":"bar1","key2[]":["bar2","bar3"]}`, val) + assert.Equal(t, `{"key1":"bar1","key2":["bar2","bar3"]}`, val) +} + +func TestRenderer_Path_FormData_Scalar(t *testing.T) { + t.Parallel() + + renderer := column.NewRenderer() + c := column.Path{ + Path: "key1", + } + + body := `key1=bar1&key2[]=bar2&key2[]=bar3` + header := http.Header{"Content-Type": []string{"application/x-www-form-urlencoded"}} + + val, err := renderer.CSVValue(c, recordctx.FromHTTP(time.Now(), &http.Request{Header: header, Body: io.NopCloser(strings.NewReader(body))})) + assert.NoError(t, err) + assert.Equal(t, `"bar1"`, val) +} + +func TestRenderer_Path_FormData_RawString(t *testing.T) { + t.Parallel() + + renderer := column.NewRenderer() + c := column.Path{ + Path: "key1", + RawString: true, + } + + body := `key1=42` + header := http.Header{"Content-Type": []string{"application/x-www-form-urlencoded"}} + + val, err := renderer.CSVValue(c, recordctx.FromHTTP(time.Now(), &http.Request{Header: header, Body: io.NopCloser(strings.NewReader(body))})) + assert.NoError(t, err) + assert.Equal(t, `42`, val) +} + +func TestRenderer_Path_FormData_Object(t *testing.T) { + t.Parallel() + + renderer := column.NewRenderer() + c := column.Path{ + Path: "key", + } + + body := `key[x]=bar2&key[y]=bar3` + header := http.Header{"Content-Type": []string{"application/x-www-form-urlencoded"}} + + val, err := renderer.CSVValue(c, recordctx.FromHTTP(time.Now(), &http.Request{Header: header, Body: io.NopCloser(strings.NewReader(body))})) + assert.NoError(t, err) + assert.Equal(t, `{"x":"bar2","y":"bar3"}`, val) +} + +func TestRenderer_Path_FormData_ArrayOfObjects(t *testing.T) { + t.Parallel() + + renderer := column.NewRenderer() + c := column.Path{ + Path: "key", + } + + body := `key[0][x]=bar2&key[1][x]=bar3` + header := http.Header{"Content-Type": []string{"application/x-www-form-urlencoded"}} + + val, err := renderer.CSVValue(c, recordctx.FromHTTP(time.Now(), &http.Request{Header: header, Body: io.NopCloser(strings.NewReader(body))})) + assert.NoError(t, err) + assert.Equal(t, `[{"x":"bar2"},{"x":"bar3"}]`, val) +} + +func TestRenderer_Path_FormData_ArrayIndex(t *testing.T) { + t.Parallel() + + renderer := column.NewRenderer() + c := column.Path{ + Path: "key[1]", + } + + body := `key[0][x]=bar2&key[1][x]=bar3` + header := http.Header{"Content-Type": []string{"application/x-www-form-urlencoded"}} + + val, err := renderer.CSVValue(c, recordctx.FromHTTP(time.Now(), &http.Request{Header: header, Body: io.NopCloser(strings.NewReader(body))})) + assert.NoError(t, err) + assert.Equal(t, `{"x":"bar3"}`, val) +} + +func TestRenderer_Path_FormData_UndefinedKey_Error(t *testing.T) { + t.Parallel() + + renderer := column.NewRenderer() + c := column.Path{ + Path: "key2", + } + + body := `key[0][x]=bar2&key[1][x]=bar3` + header := http.Header{"Content-Type": []string{"application/x-www-form-urlencoded"}} + + _, err := renderer.CSVValue(c, recordctx.FromHTTP(time.Now(), &http.Request{Header: header, Body: io.NopCloser(strings.NewReader(body))})) + require.Error(t, err) + assert.Equal(t, `path "key2" not found in the body`, err.Error()) +} + +func TestRenderer_Path_FormData_UndefinedIndex_Error(t *testing.T) { + t.Parallel() + + renderer := column.NewRenderer() + c := column.Path{ + Path: "key[2]", + } + + body := `key[0][x]=bar2&key[1][x]=bar3` + header := http.Header{"Content-Type": []string{"application/x-www-form-urlencoded"}} + + _, err := renderer.CSVValue(c, recordctx.FromHTTP(time.Now(), &http.Request{Header: header, Body: io.NopCloser(strings.NewReader(body))})) + require.Error(t, err) + assert.Equal(t, `path "key[2]" not found in the body`, err.Error()) +} + +func TestRenderer_Path_FormData_UndefinedKey_DefaultValue(t *testing.T) { + t.Parallel() + + renderer := column.NewRenderer() + c := column.Path{ + Path: "key2", + DefaultValue: ptr.Ptr("123"), + } + + body := `key[0][x]=bar2&key[1][x]=bar3` + header := http.Header{"Content-Type": []string{"application/x-www-form-urlencoded"}} + + val, err := renderer.CSVValue(c, recordctx.FromHTTP(time.Now(), &http.Request{Header: header, Body: io.NopCloser(strings.NewReader(body))})) + assert.NoError(t, err) + assert.Equal(t, `"123"`, val) +} + +func TestRenderer_Path_FormData_UndefinedKey_DefaultValue_RawString(t *testing.T) { + t.Parallel() + + renderer := column.NewRenderer() + c := column.Path{ + Path: "key2", + DefaultValue: ptr.Ptr("123"), + RawString: true, + } + + body := `key[0][x]=bar2&key[1][x]=bar3` + header := http.Header{"Content-Type": []string{"application/x-www-form-urlencoded"}} + + val, err := renderer.CSVValue(c, recordctx.FromHTTP(time.Now(), &http.Request{Header: header, Body: io.NopCloser(strings.NewReader(body))})) + assert.NoError(t, err) + assert.Equal(t, `123`, val) } func TestRenderer_Template_Json_Scalar(t *testing.T) { @@ -367,7 +548,7 @@ func TestRenderer_Template_Json_UndefinedKey_Error(t *testing.T) { header := http.Header{"Content-Type": []string{"application/json"}} _, err := renderer.CSVValue(c, recordctx.FromHTTP(time.Now(), &http.Request{Header: header, Body: io.NopCloser(strings.NewReader(body))})) - assert.Error(t, err) + require.Error(t, err) assert.Equal(t, `path "key1.invalid" not found in the body`, err.Error()) } @@ -402,7 +583,7 @@ func TestRenderer_Template_FormData_Full(t *testing.T) { val, err := renderer.CSVValue(c, recordctx.FromHTTP(time.Now(), &http.Request{Header: header, Body: io.NopCloser(strings.NewReader(body))})) assert.NoError(t, err) - assert.Equal(t, `{"key1":"bar1","key2[]":["bar2","bar3"]}`, val) + assert.Equal(t, `{"key1":"bar1","key2":["bar2","bar3"]}`, val) } func TestRenderer_Template_Headers(t *testing.T) { diff --git a/internal/pkg/utils/httputils/httputils.go b/internal/pkg/utils/httputils/httputils.go new file mode 100644 index 0000000000..414f4b49a3 --- /dev/null +++ b/internal/pkg/utils/httputils/httputils.go @@ -0,0 +1,15 @@ +package httputils + +import ( + "strings" + + "github.com/umisama/go-regexpcache" +) + +func IsContentTypeJSON(t string) bool { + return regexpcache.MustCompile(`^application/([a-zA-Z0-9\.\-]+\+)?json$`).MatchString(t) +} + +func IsContentTypeForm(t string) bool { + return strings.HasPrefix(t, "application/x-www-form-urlencoded") +} diff --git a/internal/pkg/utils/url/url.go b/internal/pkg/utils/url/url.go index f1fe07c4b3..c3b107854c 100644 --- a/internal/pkg/utils/url/url.go +++ b/internal/pkg/utils/url/url.go @@ -2,6 +2,7 @@ package url import ( "net/url" + "strconv" "strings" "github.com/keboola/go-utils/pkg/orderedmap" @@ -10,6 +11,7 @@ import ( ) // ParseQuery is taken from net/url package but returns ordered map instead of regular map. +// It is also adjusted to work better with nested keys. func ParseQuery(query string) (m *orderedmap.OrderedMap, err error) { m = orderedmap.New() for query != "" { @@ -23,30 +25,83 @@ func ParseQuery(query string) (m *orderedmap.OrderedMap, err error) { continue } key, value, _ := strings.Cut(key, "=") - key, err1 := url.QueryUnescape(key) - if err1 != nil { - if err == nil { - err = err1 + key, err = url.QueryUnescape(key) + if err != nil { + return m, err + } + value, err = url.QueryUnescape(value) + if err != nil { + return m, err + } + + path, sliceAppend, err := parseKey(key) + if err != nil { + return m, err + } + + if sliceAppend { + existingValue, found, _ := m.GetNestedPath(path) + if !found { + err = m.SetNestedPath(path, []any{value}) + if err != nil { + return m, err + } + } else { + if existingValueSlice, ok := existingValue.([]any); ok { + err = m.SetNestedPath(path, append(existingValueSlice, value)) + if err != nil { + return m, err + } + } else { + err = errors.Errorf("invalid square brackets in query") + return m, err + } } continue } - value, err1 = url.QueryUnescape(value) - if err1 != nil { - if err == nil { - err = err1 - } + + err = m.SetNestedPath(path, value) + if err != nil { + return m, err + } + } + return m, err +} + +// parseKey transforms a given url key string to orderedmap.Path. +// "key[subkey]" would give the same result as orderedmap.PathFromStr("key.subkey") +// "key[123]" would give the same result as orderedmap.PathFromStr("key[123]") +// [] at the end of the string will cause sliceAppend = true. +func parseKey(str string) (path orderedmap.Path, sliceAppend bool, err error) { + parts := strings.FieldsFunc(str, func(r rune) bool { + return r == '[' + }) + + for i, part := range parts { + if i == 0 { + path = append(path, orderedmap.MapStep(part)) continue } - existingValue, found := m.Get(key) - if found { - if existingValueSlice, ok := existingValue.([]any); ok { - m.Set(key, append(existingValueSlice, value)) + if part[len(part)-1] != ']' { + err = errors.Errorf(`Unable to parse key "%s"`, str) + return path, sliceAppend, err + } + part = part[:len(part)-1] + if part == "" { + if i == len(parts)-1 { + sliceAppend = true } else { - m.Set(key, []any{existingValue, value}) + err = errors.Errorf(`Unable to parse key "%s"`, str) } + return path, sliceAppend, err + } + + if v, err := strconv.Atoi(part); err == nil { + path = append(path, orderedmap.SliceStep(v)) } else { - m.Set(key, value) + path = append(path, orderedmap.MapStep(part)) } } - return m, err + + return path, sliceAppend, err } diff --git a/internal/pkg/utils/url/url_test.go b/internal/pkg/utils/url/url_test.go index faafb7d878..b525b53a34 100644 --- a/internal/pkg/utils/url/url_test.go +++ b/internal/pkg/utils/url/url_test.go @@ -10,7 +10,7 @@ import ( func TestParseQuery(t *testing.T) { t.Parallel() - res, err := ParseQuery("one=two&three=four&five=&six&seven[]=eight&seven[]=nine") + res, err := ParseQuery("one=two&three=four&five=&six&seven[0]=eight&seven[1]=nine&ten[]=eleven&ten[]=twelve") assert.NoError(t, err) exp := orderedmap.New() @@ -18,6 +18,51 @@ func TestParseQuery(t *testing.T) { exp.Set("three", "four") exp.Set("five", "") exp.Set("six", "") - exp.Set("seven[]", []any{"eight", "nine"}) + exp.Set("seven", []any{"eight", "nine"}) + exp.Set("ten", []any{"eleven", "twelve"}) + assert.Equal(t, exp, res) +} + +func TestParseQuery_Map(t *testing.T) { + t.Parallel() + + res, err := ParseQuery("one[two]=three&one[four]=five") + assert.NoError(t, err) + + exp := orderedmap.New() + exp.Set("one", orderedmap.FromPairs( + []orderedmap.Pair{ + { + Key: "two", + Value: "three", + }, + { + Key: "four", + Value: "five", + }, + }, + )) + assert.Equal(t, exp, res) +} + +func TestParseQuery_Nested(t *testing.T) { + t.Parallel() + + res, err := ParseQuery("k[x][0]=zero&k[x][2]=one&k[y][0]=two") + assert.NoError(t, err) + + exp := orderedmap.New() + exp.Set("k", orderedmap.FromPairs( + []orderedmap.Pair{ + { + Key: "x", + Value: []any{"zero", nil, "one"}, + }, + { + Key: "y", + Value: []any{"two"}, + }, + }, + )) assert.Equal(t, exp, res) }