diff --git a/src/v/iceberg/table_metadata_json.cc b/src/v/iceberg/table_metadata_json.cc index 3216f0d53efef..990a8fb521130 100644 --- a/src/v/iceberg/table_metadata_json.cc +++ b/src/v/iceberg/table_metadata_json.cc @@ -73,18 +73,31 @@ null_order null_order_from_str(std::string_view s) { .match(null_order_to_str(null_order::nulls_last), null_order::nulls_last); } +nested_field::id_t +parse_source_id(const json::Value& v, std::string_view field_name) { + if (!v.IsInt()) { + throw std::invalid_argument( + fmt::format("Expected {} to be int: {}", field_name, v.GetType())); + } + return nested_field::id_t{v.GetInt()}; +} + } // namespace sort_field parse_sort_field(const json::Value& v) { const auto& transform_str = parse_required_str(v, "transform"); chunked_vector source_ids; - for (const auto& id_json : parse_required_array(v, "source-ids")) { - if (!id_json.IsInt()) { - throw std::invalid_argument( - fmt::format( - "Expected source-ids to be ints: {}", id_json.GetType())); + // The spec allows for either a single "source-id" or an array of + // "source-ids" for backwards compatibility with older spec versions. Handle + // both cases. + if ( + const auto id_json = parse_optional(v, "source-id"); + id_json.has_value()) { + source_ids.emplace_back(parse_source_id(id_json->get(), "source-id")); + } else { + for (const auto& id : parse_required_array(v, "source-ids")) { + source_ids.emplace_back(parse_source_id(id, "source-ids")); } - source_ids.emplace_back(nested_field::id_t{id_json.GetInt()}); } auto direction = parse_required_str(v, "direction"); auto null_order = parse_required_str(v, "null-order"); diff --git a/src/v/iceberg/tests/table_metadata_json_test.cc b/src/v/iceberg/tests/table_metadata_json_test.cc index 588ebfdf86320..99168c7d5292a 100644 --- a/src/v/iceberg/tests/table_metadata_json_test.cc +++ b/src/v/iceberg/tests/table_metadata_json_test.cc @@ -238,6 +238,37 @@ TEST(TableMetadataJsonSerde, TestTableMetadataNoOptionals) { ASSERT_EQ(roundtrip, parsed); } +// Regression test: the Iceberg spec defines a sort field's column reference as +// a single "source-id" int. Some catalogs (and our own writer) emit the +// "source-ids" array form; the parser must accept both. +TEST(TableMetadataJsonSerde, TestParseSortFieldSourceIdSingular) { + const auto* const field_json = R"JSON({ + "transform": "identity", + "source-id": 5, + "direction": "asc", + "null-order": "nulls-first" + })JSON"; + json::Document doc; + doc.Parse(field_json); + const auto parsed = parse_sort_field(doc); + ASSERT_EQ(1, parsed.source_ids.size()); + EXPECT_EQ(nested_field::id_t{5}, parsed.source_ids[0]); + EXPECT_EQ(sort_direction::asc, parsed.direction); + EXPECT_EQ(null_order::nulls_first, parsed.null_order); +} + +TEST(TableMetadataJsonSerde, TestParseSortFieldSourceIdNotInt) { + const auto* const field_json = R"JSON({ + "transform": "identity", + "source-id": "five", + "direction": "asc", + "null-order": "nulls-first" + })JSON"; + json::Document doc; + doc.Parse(field_json); + EXPECT_THROW(parse_sort_field(doc), std::invalid_argument); +} + TEST(TableMetadataJsonSerde, TestSchemaLookup) { const auto test_str = test_table_meta_json; json::Document parsed_orig_json;