Skip to content

Commit 294a887

Browse files
authored
Test inferred schema for json array (#454)
Arrow infer schema internally uses IndexMap to keep track of inferred schema while updating it through given json objects. Thus the order in which insertion happens to this map determines the output order for schema fields as well. Since the json object in serde_json uses BtreeMap, the iteration is always ascending by key. This leads to infer_schema producing schema fields in ascending order when we infer using only one json object. In case of json array any new fields encountered are added to the last. Thus output order of infer schema did not provide good enough guarantee. This is solved in #450. This commit just adds a test for it.
1 parent 0401cc5 commit 294a887

File tree

1 file changed

+51
-0
lines changed

1 file changed

+51
-0
lines changed

server/src/handlers/http/ingest.rs

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -354,6 +354,57 @@ mod tests {
354354
.is_err())
355355
}
356356

357+
#[test]
358+
fn array_into_recordbatch_inffered_schema() {
359+
let json = json!([
360+
{
361+
"b": "hello",
362+
},
363+
{
364+
"b": "hello",
365+
"a": 1,
366+
"c": 1
367+
},
368+
{
369+
"a": 1,
370+
"b": "hello",
371+
"c": null
372+
},
373+
]);
374+
375+
let req = TestRequest::default().to_http_request();
376+
377+
let (_, rb, _) = into_event_batch(
378+
req,
379+
Bytes::from(serde_json::to_vec(&json).unwrap()),
380+
HashMap::default(),
381+
)
382+
.unwrap();
383+
384+
assert_eq!(rb.num_rows(), 3);
385+
assert_eq!(rb.num_columns(), 6);
386+
387+
let schema = rb.schema();
388+
let fields = &schema.fields;
389+
390+
assert_eq!(&*fields[1], &Field::new("a", DataType::Int64, true));
391+
assert_eq!(&*fields[2], &Field::new("b", DataType::Utf8, true));
392+
assert_eq!(&*fields[3], &Field::new("c", DataType::Int64, true));
393+
394+
assert_eq!(
395+
rb.column_by_name("a").unwrap().as_int64_arr(),
396+
&Int64Array::from(vec![None, Some(1), Some(1)])
397+
);
398+
assert_eq!(
399+
rb.column_by_name("b").unwrap().as_utf8_arr(),
400+
&StringArray::from(vec![Some("hello"), Some("hello"), Some("hello"),])
401+
);
402+
assert_eq!(
403+
rb.column_by_name("c").unwrap().as_int64_arr(),
404+
&Int64Array::from(vec![None, Some(1), None])
405+
);
406+
}
407+
357408
#[test]
358409
fn arr_with_null_into_rb() {
359410
let json = json!([

0 commit comments

Comments
 (0)