importcom.google.common.collect.ImmutableMap;importjava.util.Map;importorg.apache.beam.sdk.Pipeline;importorg.apache.beam.sdk.io.TextIO;importorg.apache.beam.sdk.managed.Managed;importorg.apache.beam.sdk.options.Description;importorg.apache.beam.sdk.options.PipelineOptions;importorg.apache.beam.sdk.options.PipelineOptionsFactory;importorg.apache.beam.sdk.transforms.MapElements;importorg.apache.beam.sdk.values.PCollectionRowTuple;importorg.apache.beam.sdk.values.TypeDescriptors;publicclassApacheIcebergRead{staticfinalStringCATALOG_TYPE="hadoop";publicinterfaceOptionsextendsPipelineOptions{@Description("The URI of the Apache Iceberg warehouse location")StringgetWarehouseLocation();voidsetWarehouseLocation(Stringvalue);@Description("Path to write the output file")StringgetOutputPath();voidsetOutputPath(Stringvalue);@Description("The name of the Apache Iceberg catalog")StringgetCatalogName();voidsetCatalogName(Stringvalue);@Description("The name of the table to write to")StringgetTableName();voidsetTableName(Stringvalue);}publicstaticvoidmain(String[]args){// Parse the pipeline options passed into the application. Example:// --runner=DirectRunner --warehouseLocation=$LOCATION --catalogName=$CATALOG \// --tableName= $TABLE_NAME --outputPath=$OUTPUT_FILE// For more information, see https://beam.apache.org/documentation/programming-guide/#configuring-pipeline-optionsOptionsoptions=PipelineOptionsFactory.fromArgs(args).withValidation().as(Options.class);Pipelinepipeline=Pipeline.create(options);// Configure the Iceberg source I/OMapcatalogConfig=ImmutableMap.<String,Object>builder().put("warehouse",options.getWarehouseLocation()).put("type",CATALOG_TYPE).build();ImmutableMap<String,Object>config=ImmutableMap.<String,Object>builder().put("table",options.getTableName()).put("catalog_name",options.getCatalogName()).put("catalog_properties",catalogConfig).build();// Build the pipeline.pipeline.apply(Managed.read(Managed.ICEBERG).withConfig(config)).getSinglePCollection()// Format each record as a string with the format 'id:name'..apply(MapElements.into(TypeDescriptors.strings()).via((row->{returnString.format("%d:%s",row.getInt64("id"),row.getString("name"));})))// Write to a text file..apply(TextIO.write().to(options.getOutputPath()).withNumShards(1).withSuffix(".txt"));pipeline.run().waitUntilFinish();}}
Sauf indication contraire, le contenu de cette page est régi par une licence Creative Commons Attribution 4.0, et les échantillons de code sont régis par une licence Apache 2.0. Pour en savoir plus, consultez les Règles du site Google Developers. Java est une marque déposée d'Oracle et/ou de ses sociétés affiliées.
Dernière mise à jour le 2025/07/11 (UTC).
[[["Facile à comprendre","easyToUnderstand","thumb-up"],["J'ai pu résoudre mon problème","solvedMyProblem","thumb-up"],["Autre","otherUp","thumb-up"]],[["Difficile à comprendre","hardToUnderstand","thumb-down"],["Informations ou exemple de code incorrects","incorrectInformationOrSampleCode","thumb-down"],["Il n'y a pas l'information/les exemples dont j'ai besoin","missingTheInformationSamplesINeed","thumb-down"],["Problème de traduction","translationIssue","thumb-down"],["Autre","otherDown","thumb-down"]],["Dernière mise à jour le 2025/07/11 (UTC)."],[[["The Managed I/O connector facilitates reading from Apache Iceberg to Dataflow, supporting Hadoop, Hive, REST-based catalogs, and BigQuery metastore."],["Managed I/O for Apache Iceberg enables batch reads, batch writes, streaming writes, dynamic destinations, and dynamic table creation."],["For BigQuery tables, the `BigQueryIO` connector is used, requiring pre-existing tables without support for dynamic table creation."],["Reading from Apache Iceberg is achieved by using the `Managed.read(Managed.ICEBERG)` in a pipeline, which can then be transformed and outputted to various destinations, like text files as shown in the provided example."],["To use the managed Iceberg I/O connectors, you must include the `beam-sdks-java-managed` and `beam-sdks-java-io-iceberg` dependencies."]]],[]]