Skip to content

Commit 8c2fd81

Browse files
committed
AVRO 4175: [C++] Allow previously parsed schemas to be referenced
1 parent 557891c commit 8c2fd81

File tree

3 files changed

+64
-1
lines changed

3 files changed

+64
-1
lines changed

lang/c++/impl/Compiler.cc

Lines changed: 28 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -94,7 +94,15 @@ static NodePtr makeNode(const string &t, SymbolTable &st, const string &ns) {
9494

9595
auto it = st.find(n);
9696
if (it != st.end()) {
97-
return NodePtr(new NodeSymbolic(asSingleAttribute(n), it->second));
97+
// Return the raw NodePtr instead of creating a new "NodeSymbolic"
98+
// via "NodePtr(new NodeSymbolic(asSingleAttribute(n), it->second))"
99+
// in order to support externally resolved named references.
100+
// This is safe because the validator canonicalizes duplicates:
101+
// when it sees the same named node again (including self-recursion),
102+
// it replaces that leaf with a NodeSymbolic via "setLeafToSymbolic".
103+
// So even if the raw NodePtr is returned initially, validation
104+
// converts repeats to symbolic links.
105+
return it->second;
98106
}
99107
throw Exception("Unknown type: {}", n);
100108
}
@@ -638,4 +646,23 @@ AVRO_DECL bool compileJsonSchema(std::istream &is, ValidSchema &schema, string &
638646
}
639647
}
640648

649+
AVRO_DECL ValidSchema compileJsonSchemaWithNamedReferences(std::istream &is,
650+
const std::map<Name, ValidSchema> &namedReferences) {
651+
if (!is.good()) {
652+
throw Exception("Input stream is not good");
653+
}
654+
655+
std::unique_ptr<InputStream> in = istreamInputStream(is);
656+
json::Entity e = json::loadEntity(*in);
657+
658+
// Convert the map<Name, ValidSchema> to SymbolTable (map<Name, NodePtr>)
659+
SymbolTable st;
660+
for (const auto &entry : namedReferences) {
661+
st[entry.first] = entry.second.root();
662+
}
663+
664+
NodePtr n = makeNode(e, st, "");
665+
return ValidSchema(n);
666+
}
667+
641668
} // namespace avro

lang/c++/include/avro/Compiler.hh

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
#include "Config.hh"
2323
#include <cstdint>
2424
#include <istream>
25+
#include <map>
2526

2627
namespace avro {
2728

@@ -33,6 +34,7 @@ class AVRO_DECL InputStream;
3334
/// build up an avro parse tree as the avro spec is parsed.
3435

3536
class AVRO_DECL ValidSchema;
37+
class AVRO_DECL Name;
3638

3739
/// Given a stream containing a JSON schema, compiles the schema to a
3840
/// ValidSchema object. Throws if the schema cannot be compiled to a valid
@@ -58,6 +60,9 @@ AVRO_DECL ValidSchema compileJsonSchemaFromString(const std::string &input);
5860

5961
AVRO_DECL ValidSchema compileJsonSchemaFromFile(const char *filename);
6062

63+
AVRO_DECL ValidSchema compileJsonSchemaWithNamedReferences(std::istream &is,
64+
const std::map<Name, ValidSchema> &namedReferences);
65+
6166
} // namespace avro
6267

6368
#endif

lang/c++/test/CompilerTests.cc

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
#include <boost/test/unit_test.hpp>
2323

2424
#include "Compiler.hh"
25+
#include "Node.hh"
2526
#include "ValidSchema.hh"
2627

2728
// Assert that empty defaults don't make json schema compilation violate bounds
@@ -82,12 +83,42 @@ void test2dArray() {
8283
BOOST_CHECK_EQUAL(expected, actual.str());
8384
}
8485

86+
void testRecordWithNamedReference() {
87+
std::string nestedSchema = "{\"name\":\"NestedRecord\",\"type\":\"record\",\"fields\":[{\"name\":\"stringField\",\"type\":\"string\"}]}";
88+
// The root schema references the nested schema above by name only.
89+
// This mimics tools that allow schemas to have references to other schemas.
90+
std::string rootSchema = "{\"name\":\"RootRecord\",\"type\":\"record\",\"fields\":[{\"name\": \"nestedField\",\"type\":\"NestedRecord\"}]}";
91+
92+
// First compile the nested schema
93+
avro::ValidSchema nestedRecord = avro::compileJsonSchemaFromString(nestedSchema);
94+
95+
// Create a map of named references
96+
std::map<avro::Name, avro::ValidSchema> namedReferences;
97+
namedReferences[avro::Name("NestedRecord")] = nestedRecord;
98+
99+
// Parse the root schema with named references
100+
std::istringstream rootSchemaStream(rootSchema);
101+
avro::ValidSchema rootRecord = avro::compileJsonSchemaWithNamedReferences(rootSchemaStream, namedReferences);
102+
103+
// Verify the schema was compiled correctly
104+
BOOST_CHECK_EQUAL("RootRecord", rootRecord.root()->name().simpleName());
105+
106+
// Get the nested field and verify its type
107+
const avro::NodePtr &rootNode = rootRecord.root();
108+
BOOST_CHECK_EQUAL(avro::AVRO_RECORD, rootNode->type());
109+
BOOST_CHECK_EQUAL(1, rootNode->leaves());
110+
111+
const avro::NodePtr &nestedFieldNode = rootNode->leafAt(0);
112+
BOOST_CHECK_EQUAL("NestedRecord", nestedFieldNode->name().simpleName());
113+
}
114+
85115
boost::unit_test::test_suite *
86116
init_unit_test_suite(int /*argc*/, char * /*argv*/[]) {
87117
using namespace boost::unit_test;
88118

89119
auto *ts = BOOST_TEST_SUITE("Avro C++ unit tests for Compiler.cc");
90120
ts->add(BOOST_TEST_CASE(&testEmptyBytesDefault));
91121
ts->add(BOOST_TEST_CASE(&test2dArray));
122+
ts->add(BOOST_TEST_CASE(&testRecordWithNamedReference));
92123
return ts;
93124
}

0 commit comments

Comments
 (0)