1616
1717package com .example .dataflow ;
1818
19+ import static org .junit .Assert .assertEquals ;
1920import static org .junit .Assert .assertTrue ;
2021
2122import com .google .common .collect .ImmutableMap ;
22- import java .io .ByteArrayOutputStream ;
2323import java .io .IOException ;
24- import java .io .PrintStream ;
2524import java .nio .file .Files ;
2625import java .nio .file .Paths ;
2726import java .util .UUID ;
27+ import org .apache .beam .sdk .PipelineResult ;
2828import org .apache .hadoop .conf .Configuration ;
2929import org .apache .hadoop .fs .Path ;
3030import org .apache .iceberg .CatalogProperties ;
5252import org .junit .Test ;
5353
5454public class ApacheIcebergIT {
55- private ByteArrayOutputStream bout ;
56- private final PrintStream originalOut = System .out ;
57-
58- private static final String CATALOG_NAME = "local" ;
59- private static final String TABLE_NAME = "table1" ;
60- private static final TableIdentifier TABLE_IDENTIFIER = TableIdentifier .of (TABLE_NAME );
61-
62- // The output file that the Dataflow pipeline writes.
63- private static final String OUTPUT_FILE_NAME_PREFIX = UUID .randomUUID ().toString ();
64- private static final String OUTPUT_FILE_NAME = OUTPUT_FILE_NAME_PREFIX + "-00000-of-00001.txt" ;
6555
6656 private Configuration hadoopConf = new Configuration ();
6757 private java .nio .file .Path warehouseDirectory ;
6858 private String warehouseLocation ;
6959 private Catalog catalog ;
70- private Table table ;
60+ private static final String CATALOG_NAME = "local" ;
7161
62+ String outputFileNamePrefix = UUID .randomUUID ().toString ();
63+ String outputFileName = outputFileNamePrefix + "-00000-of-00001.txt" ;
7264
73- private void createIcebergTable (Catalog catalog , TableIdentifier tableId ) {
65+ private Table createIcebergTable (String name ) {
66+
67+ TableIdentifier tableId = TableIdentifier .of (name );
7468
7569 // This schema represents an Iceberg table schema. It needs to match the
7670 // org.apache.beam.sdk.schemas.Schema that is defined in ApacheIcebergWrite. However, these
@@ -79,10 +73,10 @@ private void createIcebergTable(Catalog catalog, TableIdentifier tableId) {
7973 NestedField .required (1 , "id" , Types .LongType .get ()),
8074 NestedField .optional (2 , "name" , Types .StringType .get ()));
8175
82- table = catalog .createTable (tableId , schema );
76+ return catalog .createTable (tableId , schema );
8377 }
8478
85- private void writeTableRecord ()
79+ private void writeTableRecord (Table table )
8680 throws IOException {
8781 GenericRecord record = GenericRecord .create (table .schema ());
8882 record .setField ("id" , 0L );
@@ -109,72 +103,94 @@ private void writeTableRecord()
109103 .commit ();
110104 }
111105
106+ private boolean tableContainsRecord (Table table , String data ) {
107+ CloseableIterable <Record > records = IcebergGenerics .read (table ).build ();
108+ for (Record r : records ) {
109+ if (r .toString ().contains (data )) {
110+ return true ;
111+ }
112+ }
113+ return false ;
114+ }
115+
112116 @ Before
113117 public void setUp () throws IOException {
114- bout = new ByteArrayOutputStream ();
115- System .setOut (new PrintStream (bout ));
116-
117118 // Create an Apache Iceberg catalog with a table.
118119 warehouseDirectory = Files .createTempDirectory ("test-warehouse" );
119120 warehouseLocation = "file:" + warehouseDirectory .toString ();
120- System .out .println (warehouseLocation );
121121 catalog =
122122 CatalogUtil .loadCatalog (
123123 CatalogUtil .ICEBERG_CATALOG_HADOOP ,
124124 CATALOG_NAME ,
125125 ImmutableMap .of (CatalogProperties .WAREHOUSE_LOCATION , warehouseLocation ),
126126 hadoopConf );
127- createIcebergTable ( catalog , TABLE_IDENTIFIER );
127+
128128 }
129129
130130 @ After
131131 public void tearDown () throws IOException {
132- Files .deleteIfExists (Paths .get (OUTPUT_FILE_NAME ));
133- System .setOut (originalOut );
132+ Files .deleteIfExists (Paths .get (outputFileName ));
134133 }
135134
136135 @ Test
137136 public void testApacheIcebergWrite () {
137+ String tableName = "write_table" ;
138+ final Table table = createIcebergTable ("write_table" );
139+
138140 // Run the Dataflow pipeline.
139141 ApacheIcebergWrite .main (
140142 new String [] {
141143 "--runner=DirectRunner" ,
142144 "--warehouseLocation=" + warehouseLocation ,
143145 "--catalogName=" + CATALOG_NAME ,
144- "--tableName=" + TABLE_NAME
146+ "--tableName=" + tableName
145147 });
146148
147149 // Verify that the pipeline wrote records to the table.
148- Table table = catalog .loadTable (TABLE_IDENTIFIER );
149- CloseableIterable <Record > records = IcebergGenerics .read (table )
150- .build ();
151- for (Record r : records ) {
152- System .out .println (r );
153- }
150+ assertTrue (tableContainsRecord (table , "0, Alice" ));
151+ assertTrue (tableContainsRecord (table , "1, Bob" ));
152+ assertTrue (tableContainsRecord (table , "2, Charles" ));
153+ }
154+
155+ @ Test
156+ public void testApacheIcebergDynamicDestinations () {
157+ final Table tableORD = createIcebergTable ("flights-ORD" );
158+ final Table tableSYD = createIcebergTable ("flights-SYD" );
159+
160+ // Run the Dataflow pipeline.
161+ ApacheIcebergDynamicDestinations .main (
162+ new String [] {
163+ "--runner=DirectRunner" ,
164+ "--warehouseLocation=" + warehouseLocation ,
165+ "--catalogName=" + CATALOG_NAME
166+ });
154167
155- String got = bout . toString ();
156- assertTrue (got . contains ( "0, Alice" ));
157- assertTrue (got . contains ( "1, Bob " ));
158- assertTrue (got . contains ( "2, Charles " ));
168+ // Verify that the pipeline wrote records to the correct tables.
169+ assertTrue (tableContainsRecord ( tableORD , "0, Alice" ));
170+ assertTrue (tableContainsRecord ( tableORD , "2, Charles " ));
171+ assertTrue (tableContainsRecord ( tableSYD , "1, Bob " ));
159172 }
160173
161174 @ Test
162175 public void testApacheIcebergRead () throws IOException {
176+ String tableName = "read_table" ;
177+ final Table table = createIcebergTable (tableName );
178+
163179 // Seed the Apache Iceberg table with data.
164- writeTableRecord ();
180+ writeTableRecord (table );
165181
166182 // Run the Dataflow pipeline.
167183 ApacheIcebergRead .main (
168184 new String [] {
169185 "--runner=DirectRunner" ,
170186 "--warehouseLocation=" + warehouseLocation ,
171187 "--catalogName=" + CATALOG_NAME ,
172- "--tableName=" + TABLE_NAME ,
173- "--outputPath=" + OUTPUT_FILE_NAME_PREFIX
188+ "--tableName=" + tableName ,
189+ "--outputPath=" + outputFileNamePrefix
174190 });
175191
176- // Verify the pipeline wrote the table data to a local file.
177- String output = Files .readString (Paths .get (OUTPUT_FILE_NAME ));
192+ // Verify the pipeline wrote the table data to a text file.
193+ String output = Files .readString (Paths .get (outputFileName ));
178194 assertTrue (output .contains ("0:Person-0" ));
179195 }
180- }
196+ }
0 commit comments