Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 19 additions & 6 deletions src/v/iceberg/table_metadata_json.cc
Original file line number Diff line number Diff line change
Expand Up @@ -73,18 +73,31 @@ null_order null_order_from_str(std::string_view s) {
.match(null_order_to_str(null_order::nulls_last), null_order::nulls_last);
}

nested_field::id_t
parse_source_id(const json::Value& v, std::string_view field_name) {
if (!v.IsInt()) {
throw std::invalid_argument(
fmt::format("Expected {} to be int: {}", field_name, v.GetType()));
}
return nested_field::id_t{v.GetInt()};
}

} // namespace

sort_field parse_sort_field(const json::Value& v) {
const auto& transform_str = parse_required_str(v, "transform");
chunked_vector<nested_field::id_t> source_ids;
for (const auto& id_json : parse_required_array(v, "source-ids")) {
if (!id_json.IsInt()) {
throw std::invalid_argument(
fmt::format(
"Expected source-ids to be ints: {}", id_json.GetType()));
// The spec allows for either a single "source-id" or an array of
// "source-ids" for backwards compatibility with older spec versions. Handle
// both cases.
if (
const auto id_json = parse_optional(v, "source-id");
id_json.has_value()) {
source_ids.emplace_back(parse_source_id(id_json->get(), "source-id"));
} else {
for (const auto& id : parse_required_array(v, "source-ids")) {
source_ids.emplace_back(parse_source_id(id, "source-ids"));
}
source_ids.emplace_back(nested_field::id_t{id_json.GetInt()});
}
auto direction = parse_required_str(v, "direction");
auto null_order = parse_required_str(v, "null-order");
Expand Down
31 changes: 31 additions & 0 deletions src/v/iceberg/tests/table_metadata_json_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -238,6 +238,37 @@ TEST(TableMetadataJsonSerde, TestTableMetadataNoOptionals) {
ASSERT_EQ(roundtrip, parsed);
}

// Regression test: the Iceberg spec defines a sort field's column reference as
// a single "source-id" int. Some catalogs (and our own writer) emit the
// "source-ids" array form; the parser must accept both.
TEST(TableMetadataJsonSerde, TestParseSortFieldSourceIdSingular) {
const auto* const field_json = R"JSON({
"transform": "identity",
"source-id": 5,
"direction": "asc",
"null-order": "nulls-first"
})JSON";
json::Document doc;
doc.Parse(field_json);
const auto parsed = parse_sort_field(doc);
ASSERT_EQ(1, parsed.source_ids.size());
EXPECT_EQ(nested_field::id_t{5}, parsed.source_ids[0]);
EXPECT_EQ(sort_direction::asc, parsed.direction);
EXPECT_EQ(null_order::nulls_first, parsed.null_order);
}

TEST(TableMetadataJsonSerde, TestParseSortFieldSourceIdNotInt) {
const auto* const field_json = R"JSON({
"transform": "identity",
"source-id": "five",
"direction": "asc",
"null-order": "nulls-first"
})JSON";
json::Document doc;
doc.Parse(field_json);
EXPECT_THROW(parse_sort_field(doc), std::invalid_argument);
}

TEST(TableMetadataJsonSerde, TestSchemaLookup) {
const auto test_str = test_table_meta_json;
json::Document parsed_orig_json;
Expand Down
Loading