A E/S gerenciada para Apache Iceberg oferece suporte a destinos dinâmicos. Em vez de gravar em uma única tabela fixa, o conector pode selecionar dinamicamente uma tabela de destino com base nos valores de campo nos registros recebidos.
Para usar destinos dinâmicos, forneça um modelo para o parâmetro de configuração table. Para mais informações, consulte
Destinos dinâmicos.
Exemplos
Os exemplos a seguir mostram como usar a E/S gerenciada para gravar no
Apache Iceberg.
Gravar em uma tabela do Apache Iceberg
O exemplo a seguir grava dados JSON na memória em uma tabela do Apache Iceberg.
importcom.google.common.collect.ImmutableMap;importjava.util.Arrays;importjava.util.List;importjava.util.Map;importorg.apache.beam.sdk.Pipeline;importorg.apache.beam.sdk.managed.Managed;importorg.apache.beam.sdk.options.Description;importorg.apache.beam.sdk.options.PipelineOptions;importorg.apache.beam.sdk.options.PipelineOptionsFactory;importorg.apache.beam.sdk.schemas.Schema;importorg.apache.beam.sdk.transforms.Create;importorg.apache.beam.sdk.transforms.JsonToRow;importorg.apache.beam.sdk.values.PCollectionRowTuple;publicclassApacheIcebergWrite{staticfinalList<String>TABLE_ROWS=Arrays.asList("{\"id\":0, \"name\":\"Alice\"}","{\"id\":1, \"name\":\"Bob\"}","{\"id\":2, \"name\":\"Charles\"}");staticfinalStringCATALOG_TYPE="hadoop";// The schema for the table rows.publicstaticfinalSchemaSCHEMA=newSchema.Builder().addStringField("name").addInt64Field("id").build();publicinterfaceOptionsextendsPipelineOptions{@Description("The URI of the Apache Iceberg warehouse location")StringgetWarehouseLocation();voidsetWarehouseLocation(Stringvalue);@Description("The name of the Apache Iceberg catalog")StringgetCatalogName();voidsetCatalogName(Stringvalue);@Description("The name of the table to write to")StringgetTableName();voidsetTableName(Stringvalue);}publicstaticvoidmain(String[]args){// Parse the pipeline options passed into the application. Example:// --runner=DirectRunner --warehouseLocation=$LOCATION --catalogName=$CATALOG \// --tableName= $TABLE_NAME// For more information, see https://beam.apache.org/documentation/programming-guide/#configuring-pipeline-optionsOptionsoptions=PipelineOptionsFactory.fromArgs(args).withValidation().as(Options.class);Pipelinepipeline=Pipeline.create(options);// Configure the Iceberg source I/OMapcatalogConfig=ImmutableMap.<String,Object>builder().put("warehouse",options.getWarehouseLocation()).put("type",CATALOG_TYPE).build();ImmutableMap<String,Object>config=ImmutableMap.<String,Object>builder().put("table",options.getTableName()).put("catalog_name",options.getCatalogName()).put("catalog_properties",catalogConfig).build();// Build the pipeline.pipeline.apply(Create.of(TABLE_ROWS)).apply(JsonToRow.withSchema(SCHEMA)).apply(Managed.write(Managed.ICEBERG).withConfig(config));pipeline.run().waitUntilFinish();}}
Gravar com destinos dinâmicos
O exemplo a seguir grava em diferentes tabelas do Apache Iceberg com base em um
campo nos dados de entrada.
importcom.google.common.collect.ImmutableMap;importjava.util.Arrays;importjava.util.List;importjava.util.Map;importorg.apache.beam.sdk.Pipeline;importorg.apache.beam.sdk.PipelineResult;importorg.apache.beam.sdk.managed.Managed;importorg.apache.beam.sdk.options.Description;importorg.apache.beam.sdk.options.PipelineOptions;importorg.apache.beam.sdk.options.PipelineOptionsFactory;importorg.apache.beam.sdk.schemas.Schema;importorg.apache.beam.sdk.transforms.Create;importorg.apache.beam.sdk.transforms.JsonToRow;publicclassApacheIcebergDynamicDestinations{// The schema for the table rows.publicstaticfinalSchemaSCHEMA=newSchema.Builder().addInt64Field("id").addStringField("name").addStringField("airport").build();// The data to write to table, formatted as JSON strings.staticfinalList<String>TABLE_ROWS=List.of("{\"id\":0, \"name\":\"Alice\", \"airport\": \"ORD\" }","{\"id\":1, \"name\":\"Bob\", \"airport\": \"SYD\" }","{\"id\":2, \"name\":\"Charles\", \"airport\": \"ORD\" }");publicinterfaceOptionsextendsPipelineOptions{@Description("The URI of the Apache Iceberg warehouse location")StringgetWarehouseLocation();voidsetWarehouseLocation(Stringvalue);@Description("The name of the Apache Iceberg catalog")StringgetCatalogName();voidsetCatalogName(Stringvalue);}// Write JSON data to Apache Iceberg, using dynamic destinations to determine the Iceberg table// where Dataflow writes each record. The JSON data contains a field named "airport". The// Dataflow pipeline writes to Iceberg tables with the naming pattern "flights-{airport}".publicstaticvoidmain(String[]args){// Parse the pipeline options passed into the application. Example:// --runner=DirectRunner --warehouseLocation=$LOCATION --catalogName=$CATALOG \// For more information, see https://beam.apache.org/documentation/programming-guide/#configuring-pipeline-optionsOptionsoptions=PipelineOptionsFactory.fromArgs(args).withValidation().as(Options.class);Pipelinepipeline=Pipeline.create(options);// Configure the Iceberg source I/OMapcatalogConfig=ImmutableMap.<String,Object>builder().put("warehouse",options.getWarehouseLocation()).put("type","hadoop").build();ImmutableMap<String,Object>config=ImmutableMap.<String,Object>builder().put("catalog_name",options.getCatalogName()).put("catalog_properties",catalogConfig)// Route the incoming records based on the value of the "airport" field..put("table","flights-{airport}")// Specify which fields to keep from the input data..put("keep",Arrays.asList("name","id")).build();// Build the pipeline.pipeline// Read in-memory JSON data..apply(Create.of(TABLE_ROWS))// Convert the JSON records to Row objects..apply(JsonToRow.withSchema(SCHEMA))// Write each Row to Apache Iceberg..apply(Managed.write(Managed.ICEBERG).withConfig(config));// Run the pipeline.pipeline.run().waitUntilFinish();}}
[[["Fácil de entender","easyToUnderstand","thumb-up"],["Meu problema foi resolvido","solvedMyProblem","thumb-up"],["Outro","otherUp","thumb-up"]],[["Difícil de entender","hardToUnderstand","thumb-down"],["Informações incorretas ou exemplo de código","incorrectInformationOrSampleCode","thumb-down"],["Não contém as informações/amostras de que eu preciso","missingTheInformationSamplesINeed","thumb-down"],["Problema na tradução","translationIssue","thumb-down"],["Outro","otherDown","thumb-down"]],["Última atualização 2025-08-18 UTC."],[[["\u003cp\u003eManaged I/O connector enables writing from Dataflow to Apache Iceberg, supporting batch and streaming writes, as well as dynamic destinations and dynamic table creation.\u003c/p\u003e\n"],["\u003cp\u003eSupported catalogs for Apache Iceberg include Hadoop, Hive, REST-based catalogs, and BigQuery metastore, with specific dependencies or version requirements for some.\u003c/p\u003e\n"],["\u003cp\u003eThe \u003ccode\u003eBigQueryIO\u003c/code\u003e connector with BigQuery Storage API should be used for BigQuery tables for Apache Iceberg, noting that these tables must already exist, as dynamic table creation is not supported.\u003c/p\u003e\n"],["\u003cp\u003eConfiguration for Managed I/O to write to Apache Iceberg involves specifying parameters like \u003ccode\u003etable\u003c/code\u003e, \u003ccode\u003ecatalog_name\u003c/code\u003e, \u003ccode\u003ecatalog_properties\u003c/code\u003e, \u003ccode\u003econfig_properties\u003c/code\u003e, and \u003ccode\u003etriggering_frequency_seconds\u003c/code\u003e.\u003c/p\u003e\n"],["\u003cp\u003eManaged I/O for Apache Iceberg supports writing to multiple tables dynamically based on field values within incoming records, demonstrated through examples that showcase both standard and dynamic table writing processes.\u003c/p\u003e\n"]]],[],null,["# Write from Dataflow to Apache Iceberg\n\nTo write from Dataflow to Apache Iceberg, use the\n[managed I/O connector](/dataflow/docs/guides/managed-io-iceberg).\n\nManaged I/O supports the following capabilities for Apache Iceberg:\n\nFor [BigQuery tables for Apache Iceberg](/bigquery/docs/iceberg-tables),\nuse the\n[`BigQueryIO` connector](https://beam.apache.org/documentation/io/built-in/google-bigquery/)\nwith BigQuery Storage API. The table must already exist; dynamic table creation is\nnot supported.\n\nDependencies\n------------\n\nAdd the following dependencies to your project: \n\n### Java\n\n \u003cdependency\u003e\n \u003cgroupId\u003eorg.apache.beam\u003c/groupId\u003e\n \u003cartifactId\u003ebeam-sdks-java-managed\u003c/artifactId\u003e\n \u003cversion\u003e${beam.version}\u003c/version\u003e\n \u003c/dependency\u003e\n\n \u003cdependency\u003e\n \u003cgroupId\u003eorg.apache.beam\u003c/groupId\u003e\n \u003cartifactId\u003ebeam-sdks-java-io-iceberg\u003c/artifactId\u003e\n \u003cversion\u003e${beam.version}\u003c/version\u003e\n \u003c/dependency\u003e\n\nDynamic destinations\n--------------------\n\nManaged I/O for Apache Iceberg supports dynamic destinations. Instead of\nwriting to a single fixed table, the connector can dynamically select a\ndestination table based on field values within the incoming records.\n\nTo use dynamic destinations, provide a template for the `table` configuration\nparameter. For more information, see\n[Dynamic destinations](/dataflow/docs/guides/managed-io#dynamic-destinations).\n\nExamples\n--------\n\nThe following examples show how to use Managed I/O to write to\nApache Iceberg.\n\n### Write to an Apache Iceberg table\n\nThe following example writes in-memory JSON data to an Apache Iceberg table. \n\n### Java\n\n\nTo authenticate to Dataflow, set up Application Default Credentials.\nFor more information, see\n\n[Set up authentication for a local development environment](/docs/authentication/set-up-adc-local-dev-environment).\n\n import com.google.common.collect.ImmutableMap;\n import java.util.Arrays;\n import java.util.List;\n import java.util.Map;\n import org.apache.beam.sdk.Pipeline;\n import org.apache.beam.sdk.managed.Managed;\n import org.apache.beam.sdk.options.Description;\n import org.apache.beam.sdk.options.PipelineOptions;\n import org.apache.beam.sdk.options.PipelineOptionsFactory;\n import org.apache.beam.sdk.schemas.Schema;\n import org.apache.beam.sdk.transforms.Create;\n import org.apache.beam.sdk.transforms.JsonToRow;\n import org.apache.beam.sdk.values.PCollectionRowTuple;\n\n public class ApacheIcebergWrite {\n static final List\u003cString\u003e TABLE_ROWS = Arrays.asList(\n \"{\\\"id\\\":0, \\\"name\\\":\\\"Alice\\\"}\",\n \"{\\\"id\\\":1, \\\"name\\\":\\\"Bob\\\"}\",\n \"{\\\"id\\\":2, \\\"name\\\":\\\"Charles\\\"}\"\n );\n\n static final String CATALOG_TYPE = \"hadoop\";\n\n // The schema for the table rows.\n public static final Schema SCHEMA = new Schema.Builder()\n .addStringField(\"name\")\n .addInt64Field(\"id\")\n .build();\n\n public interface Options extends PipelineOptions {\n @Description(\"The URI of the Apache Iceberg warehouse location\")\n String getWarehouseLocation();\n\n void setWarehouseLocation(String value);\n\n @Description(\"The name of the Apache Iceberg catalog\")\n String getCatalogName();\n\n void setCatalogName(String value);\n\n @Description(\"The name of the table to write to\")\n String getTableName();\n\n void setTableName(String value);\n }\n\n public static void main(String[] args) {\n\n // Parse the pipeline options passed into the application. Example:\n // --runner=DirectRunner --warehouseLocation=$LOCATION --catalogName=$CATALOG \\\n // --tableName= $TABLE_NAME\n // For more information, see https://beam.apache.org/documentation/programming-guide/#configuring-pipeline-options\n Options options = PipelineOptionsFactory.fromArgs(args).withValidation().as(Options.class);\n Pipeline pipeline = Pipeline.create(options);\n\n // Configure the Iceberg source I/O\n Map catalogConfig = ImmutableMap.\u003cString, Object\u003ebuilder()\n .put(\"warehouse\", options.getWarehouseLocation())\n .put(\"type\", CATALOG_TYPE)\n .build();\n\n ImmutableMap\u003cString, Object\u003e config = ImmutableMap.\u003cString, Object\u003ebuilder()\n .put(\"table\", options.getTableName())\n .put(\"catalog_name\", options.getCatalogName())\n .put(\"catalog_properties\", catalogConfig)\n .build();\n\n // Build the pipeline.\n pipeline.apply(Create.of(TABLE_ROWS))\n .apply(JsonToRow.withSchema(SCHEMA))\n .apply(Managed.write(Managed.ICEBERG).withConfig(config));\n\n pipeline.run().waitUntilFinish();\n }\n }\n\n\u003cbr /\u003e\n\n### Write with dynamic destinations\n\nThe following example writes to different Apache Iceberg tables based on a\nfield in the input data. \n\n### Java\n\n\nTo authenticate to Dataflow, set up Application Default Credentials.\nFor more information, see\n\n[Set up authentication for a local development environment](/docs/authentication/set-up-adc-local-dev-environment).\n\n import com.google.common.collect.ImmutableMap;\n import java.util.Arrays;\n import java.util.List;\n import java.util.Map;\n import org.apache.beam.sdk.Pipeline;\n import org.apache.beam.sdk.PipelineResult;\n import org.apache.beam.sdk.managed.Managed;\n import org.apache.beam.sdk.options.Description;\n import org.apache.beam.sdk.options.PipelineOptions;\n import org.apache.beam.sdk.options.PipelineOptionsFactory;\n import org.apache.beam.sdk.schemas.Schema;\n import org.apache.beam.sdk.transforms.Create;\n import org.apache.beam.sdk.transforms.JsonToRow;\n\n public class ApacheIcebergDynamicDestinations {\n\n // The schema for the table rows.\n public static final Schema SCHEMA = new Schema.Builder()\n .addInt64Field(\"id\")\n .addStringField(\"name\")\n .addStringField(\"airport\")\n .build();\n\n // The data to write to table, formatted as JSON strings.\n static final List\u003cString\u003e TABLE_ROWS = List.of(\n \"{\\\"id\\\":0, \\\"name\\\":\\\"Alice\\\", \\\"airport\\\": \\\"ORD\\\" }\",\n \"{\\\"id\\\":1, \\\"name\\\":\\\"Bob\\\", \\\"airport\\\": \\\"SYD\\\" }\",\n \"{\\\"id\\\":2, \\\"name\\\":\\\"Charles\\\", \\\"airport\\\": \\\"ORD\\\" }\"\n );\n\n public interface Options extends PipelineOptions {\n @Description(\"The URI of the Apache Iceberg warehouse location\")\n String getWarehouseLocation();\n\n void setWarehouseLocation(String value);\n\n @Description(\"The name of the Apache Iceberg catalog\")\n String getCatalogName();\n\n void setCatalogName(String value);\n }\n\n // Write JSON data to Apache Iceberg, using dynamic destinations to determine the Iceberg table\n // where Dataflow writes each record. The JSON data contains a field named \"airport\". The\n // Dataflow pipeline writes to Iceberg tables with the naming pattern \"flights-{airport}\".\n public static void main(String[] args) {\n // Parse the pipeline options passed into the application. Example:\n // --runner=DirectRunner --warehouseLocation=$LOCATION --catalogName=$CATALOG \\\n // For more information, see https://beam.apache.org/documentation/programming-guide/#configuring-pipeline-options\n Options options = PipelineOptionsFactory.fromArgs(args).withValidation().as(Options.class);\n Pipeline pipeline = Pipeline.create(options);\n\n // Configure the Iceberg source I/O\n Map catalogConfig = ImmutableMap.\u003cString, Object\u003ebuilder()\n .put(\"warehouse\", options.getWarehouseLocation())\n .put(\"type\", \"hadoop\")\n .build();\n\n ImmutableMap\u003cString, Object\u003e config = ImmutableMap.\u003cString, Object\u003ebuilder()\n .put(\"catalog_name\", options.getCatalogName())\n .put(\"catalog_properties\", catalogConfig)\n // Route the incoming records based on the value of the \"airport\" field.\n .put(\"table\", \"flights-{airport}\")\n // Specify which fields to keep from the input data.\n .put(\"keep\", Arrays.asList(\"name\", \"id\"))\n .build();\n\n // Build the pipeline.\n pipeline\n // Read in-memory JSON data.\n .apply(Create.of(TABLE_ROWS))\n // Convert the JSON records to Row objects.\n .apply(JsonToRow.withSchema(SCHEMA))\n // Write each Row to Apache Iceberg.\n .apply(Managed.write(Managed.ICEBERG).withConfig(config));\n\n // Run the pipeline.\n pipeline.run().waitUntilFinish();\n }\n }\n\n\u003cbr /\u003e\n\nWhat's next\n-----------\n\n- [Read from Apache Iceberg](/dataflow/docs/guides/read-from-iceberg).\n- Learn more about [Managed I/O](/dataflow/docs/guides/managed-io)."]]