GoogleCloudDataproc
diff --git a/‎spark-bigquery-connector-common/src/main/java/com/google/cloud/spark/bigquery/InjectorBuilder.java
+7-1 b/‎spark-bigquery-connector-common/src/main/java/com/google/cloud/spark/bigquery/InjectorBuilder.java
+7-1
diff --git a/‎spark-bigquery-connector-common/src/main/java/com/google/cloud/spark/bigquery/InjectorFactory.java
+8-1 b/‎spark-bigquery-connector-common/src/main/java/com/google/cloud/spark/bigquery/InjectorFactory.java
+8-1
diff --git a/‎spark-bigquery-connector-common/src/main/java/com/google/cloud/spark/bigquery/SparkBigQueryConfig.java
+14-5 b/‎spark-bigquery-connector-common/src/main/java/com/google/cloud/spark/bigquery/SparkBigQueryConfig.java
+14-5
diff --git a/‎spark-bigquery-connector-common/src/main/java/com/google/cloud/spark/bigquery/SparkBigQueryConnectorModule.java
+11-1 b/‎spark-bigquery-connector-common/src/main/java/com/google/cloud/spark/bigquery/SparkBigQueryConnectorModule.java
+11-1
diff --git a/‎spark-bigquery-connector-common/src/main/java/com/google/cloud/spark/bigquery/util/HdfsUtils.java
+51 b/‎spark-bigquery-connector-common/src/main/java/com/google/cloud/spark/bigquery/util/HdfsUtils.java
+51
diff --git a/‎spark-bigquery-connector-common/src/main/java/com/google/cloud/spark/bigquery/write/BigQueryDataSourceWriterInsertableRelation.java
+75 b/‎spark-bigquery-connector-common/src/main/java/com/google/cloud/spark/bigquery/write/BigQueryDataSourceWriterInsertableRelation.java
+75
diff --git a/‎spark-bigquery-connector-common/src/main/java/com/google/cloud/spark/bigquery/write/BigQueryDeprecatedIndirectInsertableRelation.java
+45 b/‎spark-bigquery-connector-common/src/main/java/com/google/cloud/spark/bigquery/write/BigQueryDeprecatedIndirectInsertableRelation.java
+45
@@ -29,6 +29,7 @@ public class InjectorBuilder {
   private SparkSession spark = SparkSession.active();
   private Optional<StructType> schema = Optional.empty();
   private Map<String, String> options = ImmutableMap.<String, String>of();
+  private Map<String, String> customDefaults = ImmutableMap.<String, String>of();
   private boolean tableIsMandatory = true;
   private DataSourceVersion dataSourceVersion = DataSourceVersion.V2;
 
@@ -57,10 +58,15 @@ public InjectorBuilder withDataSourceVersion(DataSourceVersion dataSourceVersion
     return this;
   }
 
+  public InjectorBuilder withCustomDefaults(Map<String, String> customDefaults) {
+    this.customDefaults = customDefaults;
+    return this;
+  }
+
   public Injector build() {
     return Guice.createInjector(
         new BigQueryClientModule(),
         new SparkBigQueryConnectorModule(
-            spark, options, schema, dataSourceVersion, tableIsMandatory));
+            spark, options, customDefaults, schema, dataSourceVersion, tableIsMandatory));
   }
 }
@@ -16,6 +16,7 @@
 package com.google.cloud.spark.bigquery;
 
 import com.google.cloud.bigquery.connector.common.BigQueryClientModule;
+import com.google.common.collect.ImmutableMap;
 import com.google.inject.Guice;
 import com.google.inject.Injector;
 import java.util.Map;
@@ -37,9 +38,15 @@ public static Injector createInjector(
       StructType schema,
       Map<String, String> options,
       boolean tableIsMandatory) {
+    Map<String, String> customDefaults = ImmutableMap.of();
     return Guice.createInjector(
         new BigQueryClientModule(),
         new SparkBigQueryConnectorModule(
-            spark, options, Optional.ofNullable(schema), DataSourceVersion.V2, tableIsMandatory));
+            spark,
+            options,
+            customDefaults,
+            Optional.ofNullable(schema),
+            DataSourceVersion.V2,
+            tableIsMandatory));
   }
 }
@@ -78,7 +78,8 @@ public class SparkBigQueryConfig
 
   public enum WriteMethod {
     DIRECT,
-    INDIRECT;
+    INDIRECT,
+    OLD_INDIRECT;
 
     public static WriteMethod from(@Nullable String writeMethod) {
       try {
@@ -96,6 +97,7 @@ public static WriteMethod from(@Nullable String writeMethod) {
   public static final String VALIDATE_SPARK_AVRO_PARAM = "validateSparkAvroInternalParam";
   public static final String ENABLE_LIST_INFERENCE = "enableListInference";
   public static final String INTERMEDIATE_FORMAT_OPTION = "intermediateFormat";
+  public static final String WRITE_METHOD_PARAM = "writeMethod";
   @VisibleForTesting static final DataFormat DEFAULT_READ_DATA_FORMAT = DataFormat.ARROW;
 
   @VisibleForTesting
@@ -186,6 +188,7 @@ public static WriteMethod from(@Nullable String writeMethod) {
   // the catalog ones
   public static SparkBigQueryConfig from(
       Map<String, String> options,
+      ImmutableMap<String, String> customDefaults,
       DataSourceVersion dataSourceVersion,
       SparkSession spark,
       Optional<StructType> schema,
@@ -196,6 +199,7 @@ public static SparkBigQueryConfig from(
         ImmutableMap.copyOf(optionsMap),
         ImmutableMap.copyOf(mapAsJavaMap(spark.conf().getAll())),
         spark.sparkContext().hadoopConfiguration(),
+        customDefaults,
         spark.sparkContext().defaultParallelism(),
         spark.sqlContext().conf(),
         spark.version(),
@@ -208,6 +212,7 @@ public static SparkBigQueryConfig from(
       Map<String, String> optionsInput,
       ImmutableMap<String, String> originalGlobalOptions,
       Configuration hadoopConfiguration,
+      ImmutableMap<String, String> customDefaults,
       int defaultParallelism,
       SQLConf sqlConf,
       String sparkVersion,
@@ -375,10 +380,14 @@ public static SparkBigQueryConfig from(
             .transform(String::toUpperCase)
             .or(DEFAULT_ARROW_COMPRESSION_CODEC.toString());
 
+    WriteMethod writeMethodDefault =
+        Optional.ofNullable(customDefaults.get(WRITE_METHOD_PARAM))
+            .map(WriteMethod::from)
+            .orElse(DEFAULT_WRITE_METHOD);
     config.writeMethod =
-        getAnyOption(globalOptions, options, "writeMethod")
+        getAnyOption(globalOptions, options, WRITE_METHOD_PARAM)
             .transform(WriteMethod::from)
-            .or(DEFAULT_WRITE_METHOD);
+            .or(writeMethodDefault);
 
     try {
       config.arrowCompressionCodec = CompressionCodec.valueOf(arrowCompressionCodecParam);
@@ -844,8 +853,8 @@ static boolean isSpark24OrAbove(String sparkVersion) {
     }
 
     // could not load the spark-avro data source
-    private static IllegalStateException missingAvroException(
-        String sparkVersion, Exception cause) {
+    @VisibleForTesting
+    static IllegalStateException missingAvroException(String sparkVersion, Exception cause) {
       String avroPackage;
       if (isSpark24OrAbove(sparkVersion)) {
         String scalaVersion = scala.util.Properties.versionNumberString();
 
@@ -17,6 +17,7 @@
 
 import com.google.cloud.bigquery.connector.common.BigQueryConfig;
 import com.google.cloud.bigquery.connector.common.UserAgentProvider;
+import com.google.common.collect.ImmutableMap;
 import com.google.inject.Binder;
 import com.google.inject.Module;
 import com.google.inject.Provides;
@@ -30,18 +31,21 @@ public class SparkBigQueryConnectorModule implements Module {
 
   private final SparkSession spark;
   private final Map<String, String> options;
+  private final Map<String, String> customDefaults;
   private final Optional<StructType> schema;
   private final DataSourceVersion dataSourceVersion;
   private final boolean tableIsMandatory;
 
   public SparkBigQueryConnectorModule(
       SparkSession spark,
       Map<String, String> options,
+      Map<String, String> customDefaults,
       Optional<StructType> schema,
       DataSourceVersion dataSourceVersion,
       boolean tableIsMandatory) {
     this.spark = spark;
     this.options = options;
+    this.customDefaults = customDefaults;
     this.schema = schema;
     this.dataSourceVersion = dataSourceVersion;
     this.tableIsMandatory = tableIsMandatory;
@@ -67,7 +71,13 @@ public DataSourceVersion provideDataSourceVersion() {
   @Singleton
   @Provides
   public SparkBigQueryConfig provideSparkBigQueryConfig() {
-    return SparkBigQueryConfig.from(options, dataSourceVersion, spark, schema, tableIsMandatory);
+    return SparkBigQueryConfig.from(
+        options,
+        ImmutableMap.copyOf(customDefaults),
+        dataSourceVersion,
+        spark,
+        schema,
+        tableIsMandatory);
   }
 
   @Singleton
 
@@ -0,0 +1,51 @@
+/*
+ * Copyright 2022 Google Inc. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.google.cloud.spark.bigquery.util;
+
+import java.io.IOException;
+import java.io.UncheckedIOException;
+import java.util.Iterator;
+import org.apache.hadoop.fs.RemoteIterator;
+
+public class HdfsUtils {
+
+  /** Converts HDFS RemoteIterator to java.util.Iterator */
+  public static <T> Iterator<T> toJavaUtilIterator(final RemoteIterator<T> remoteIterator) {
+    return new Iterator<T>() {
+      @Override
+      public boolean hasNext() {
+        try {
+          return remoteIterator.hasNext();
+        } catch (IOException e) {
+          throw new UncheckedIOException(e);
+        }
+      }
+
+      @Override
+      public T next() {
+        try {
+          return remoteIterator.next();
+        } catch (IOException e) {
+          throw new UncheckedIOException(e);
+        }
+      }
+    };
+  }
+
+  public static <T> Iterable<T> toJavaUtilIterable(final RemoteIterator<T> remoteIterator) {
+    return () -> toJavaUtilIterator(remoteIterator);
+  }
+}
@@ -0,0 +1,75 @@
+/*
+ * Copyright 2022 Google Inc. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.google.cloud.spark.bigquery.write;
+
+import com.google.cloud.bigquery.connector.common.BigQueryClient;
+import com.google.cloud.spark.bigquery.SparkBigQueryConfig;
+import com.google.cloud.spark.bigquery.write.context.DataSourceWriterContext;
+import com.google.cloud.spark.bigquery.write.context.WriterCommitMessageContext;
+import org.apache.spark.api.java.JavaRDD;
+import org.apache.spark.sql.Dataset;
+import org.apache.spark.sql.Row;
+import org.apache.spark.sql.SQLContext;
+
+public class BigQueryDataSourceWriterInsertableRelation extends BigQueryInsertableRelationBase {
+
+  private final DataSourceWriterContext ctx;
+
+  public BigQueryDataSourceWriterInsertableRelation(
+      BigQueryClient bigQueryClient,
+      SQLContext sqlContext,
+      SparkBigQueryConfig config,
+      DataSourceWriterContext ctx) {
+    super(bigQueryClient, sqlContext, config);
+    this.ctx = ctx;
+  }
+
+  @Override
+  public void insert(Dataset<Row> data, boolean overwrite) {
+    logger.debug("Inserting data={}, overwrite={}", data, overwrite);
+
+    // Here we are mimicking the DataSource v2 API behaviour in oder to use the shared code. The
+    // partition handler
+    // iterates on each partition separately, invoking the DataWriter interface. The result of the
+    // iteration is a
+    // WriterCommitMessageContext which is used to perform the global commit, or abort if needed.
+    try {
+      DataSourceWriterContextPartitionHandler partitionHandler =
+          new DataSourceWriterContextPartitionHandler(
+              ctx.createWriterContextFactory(), System.currentTimeMillis());
+
+      JavaRDD<Row> rowsRDD = data.toJavaRDD();
+      int numPartitions = rowsRDD.getNumPartitions();
+      JavaRDD<WriterCommitMessageContext> writerCommitMessagesRDD =
+          rowsRDD.mapPartitionsWithIndex(partitionHandler, false);
+      WriterCommitMessageContext[] writerCommitMessages =
+          writerCommitMessagesRDD.collect().toArray(new WriterCommitMessageContext[0]);
+      if (writerCommitMessages.length == numPartitions) {
+        ctx.commit(writerCommitMessages);
+      } else {
+        // missing commit messages, so abort
+        logger.warn(
+            "It seems that {} out of {} partitions have failed, aborting",
+            numPartitions - writerCommitMessages.length,
+            writerCommitMessages.length);
+        ctx.abort(writerCommitMessages);
+      }
+    } catch (Exception e) {
+      logger.warn("unexpected issue trying to save " + data, e);
+      ctx.abort(new WriterCommitMessageContext[] {});
+    }
+  }
+}
@@ -0,0 +1,45 @@
+/*
+ * Copyright 2022 Google Inc. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.google.cloud.spark.bigquery.write;
+
+import com.google.cloud.bigquery.connector.common.BigQueryClient;
+import com.google.cloud.spark.bigquery.SparkBigQueryConfig;
+import org.apache.spark.sql.Dataset;
+import org.apache.spark.sql.Row;
+import org.apache.spark.sql.SQLContext;
+import org.apache.spark.sql.SaveMode;
+
+/**
+ * The original indirect insertable relation, using Spark's write. Intermediate formats are Parquet,
+ * ORC or Avro. Deprecated in favor of BigQueryDataSourceWriterInsertableRelation.
+ */
+public class BigQueryDeprecatedIndirectInsertableRelation extends BigQueryInsertableRelationBase {
+
+  public BigQueryDeprecatedIndirectInsertableRelation(
+      BigQueryClient bigQueryClient, SQLContext sqlContext, SparkBigQueryConfig config) {
+    super(bigQueryClient, sqlContext, config);
+  }
+
+  @Override
+  public void insert(Dataset<Row> data, boolean overwrite) {
+    logger.debug("Inserting data={}, overwrite={}", data, overwrite);
+    // the helper also supports the v2 api
+    SaveMode saveMode = overwrite ? SaveMode.Overwrite : SaveMode.Append;
+    BigQueryWriteHelper helper =
+        new BigQueryWriteHelper(bigQueryClient, sqlContext, saveMode, config, data, exists());
+    helper.writeDataFrameToBigQuery();
+  }
+}