apache · LuciferYang · May 8, 2026 · May 8, 2026 · May 9, 2026
diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml
@@ -43,7 +43,7 @@ on:
         description: Additional environment variables to set when running the tests. Should be in JSON format.
         required: false
         type: string
-        default: '{"PYSPARK_IMAGE_TO_TEST": "python-312", "PYTHON_TO_TEST": "python3.12"}'
+        default: '{"PYSPARK_IMAGE_TO_TEST": "python-312", "PYTHON_TO_TEST": "python3.12", "SPARK_SQL_WSCG_UNION_ENABLED": "true"}'
       jobs:
         description: >-
           Jobs to run, and should be in JSON format. The values should be matched with the job's key defined
@@ -1417,6 +1417,7 @@ jobs:
         SPARK_TPCDS_DATA=`pwd`/tpcds-sf-1 build/sbt "sql/testOnly org.apache.spark.sql.TPCDSQueryTestSuite"
       env:
         SPARK_ANSI_SQL_MODE: ${{ fromJSON(inputs.envs).SPARK_ANSI_SQL_MODE }}
+        SPARK_SQL_WSCG_UNION_ENABLED: ${{ fromJSON(inputs.envs).SPARK_SQL_WSCG_UNION_ENABLED }}
         SPARK_TPCDS_JOIN_CONF: |
           spark.sql.autoBroadcastJoinThreshold=-1
           spark.sql.join.preferSortMergeJoin=true
@@ -1425,13 +1426,15 @@ jobs:
         SPARK_TPCDS_DATA=`pwd`/tpcds-sf-1 build/sbt "sql/testOnly org.apache.spark.sql.TPCDSQueryTestSuite"
       env:
         SPARK_ANSI_SQL_MODE: ${{ fromJSON(inputs.envs).SPARK_ANSI_SQL_MODE }}
+        SPARK_SQL_WSCG_UNION_ENABLED: ${{ fromJSON(inputs.envs).SPARK_SQL_WSCG_UNION_ENABLED }}
         SPARK_TPCDS_JOIN_CONF: |
           spark.sql.autoBroadcastJoinThreshold=10485760
     - name: Run TPC-DS queries (Shuffled hash join)
       run: |
         SPARK_TPCDS_DATA=`pwd`/tpcds-sf-1 build/sbt "sql/testOnly org.apache.spark.sql.TPCDSQueryTestSuite"
       env:
         SPARK_ANSI_SQL_MODE: ${{ fromJSON(inputs.envs).SPARK_ANSI_SQL_MODE }}
+        SPARK_SQL_WSCG_UNION_ENABLED: ${{ fromJSON(inputs.envs).SPARK_SQL_WSCG_UNION_ENABLED }}
         SPARK_TPCDS_JOIN_CONF: |
           spark.sql.autoBroadcastJoinThreshold=-1
           spark.sql.join.forceApplyShuffledHashJoin=true

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
@@ -2526,7 +2526,7 @@ object SQLConf {
       .version("4.2.0")
       .withBindingPolicy(ConfigBindingPolicy.SESSION)
       .booleanConf
-      .createWithDefault(false)
+      .createWithDefault(sys.env.get("SPARK_SQL_WSCG_UNION_ENABLED").contains("true"))
 
   val WHOLESTAGE_UNION_MAX_CHILDREN =
     buildConf("spark.sql.codegen.wholeStage.union.maxChildren")

diff --git a/sql/core/src/test/scala/org/apache/spark/sql/PlanStabilitySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/PlanStabilitySuite.scala
@@ -268,7 +268,8 @@ trait PlanStabilitySuite extends DisableAdaptiveExecutionSuite {
       // Disable char/varchar read-side handling for better performance.
       SQLConf.READ_SIDE_CHAR_PADDING.key -> "false",
       SQLConf.LEGACY_NO_CHAR_PADDING_IN_PREDICATE.key -> "true",
-      SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "10MB") {
+      SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "10MB",
+      SQLConf.WHOLESTAGE_UNION_CODEGEN_ENABLED.key -> "false") {
       val qe = sql(queryString).queryExecution
       val plan = qe.executedPlan
       val explain = normalizeLocation(normalizeIds(qe.explainString(FormattedMode)))

diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala
@@ -178,6 +178,7 @@ class SQLQueryTestSuite extends SharedSparkSession
     // regex magic.
     .set("spark.test.noSerdeInExplain", "true")
     .set(SQLConf.SCHEMA_LEVEL_COLLATIONS_ENABLED, true)
+    .set(SQLConf.WHOLESTAGE_UNION_CODEGEN_ENABLED, false)
 
   // SPARK-32106 Since we add SQL test 'transform.sql' will use `cat` command,
   // here we need to ignore it.

diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/metric/SQLMetricsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/metric/SQLMetricsSuite.scala
@@ -726,16 +726,18 @@ class SQLMetricsSuite extends SharedSparkSession with SQLMetricsTestUtils
   }
 
   test("SPARK-25278: output metrics are wrong for plans repeated in the query") {
-    val name = "demo_view"
-    withView(name) {
-      sql(s"CREATE OR REPLACE VIEW $name AS VALUES 1,2")
-      val view = spark.table(name)
-      val union = view.union(view)
-      testSparkPlanMetrics(union, 1, Map(
-        0L -> ("Union" -> Map()),
-        1L -> ("Project" -> Map()),
-        2L -> ("LocalTableScan" -> Map("number of output rows" -> 2L)),
-        3L -> ("LocalTableScan" -> Map("number of output rows" -> 2L))))
+    withSQLConf(SQLConf.WHOLESTAGE_UNION_CODEGEN_ENABLED.key -> "false") {
+      val name = "demo_view"
+      withView(name) {
+        sql(s"CREATE OR REPLACE VIEW $name AS VALUES 1,2")
+        val view = spark.table(name)
+        val union = view.union(view)
+        testSparkPlanMetrics(union, 1, Map(
+          0L -> ("Union" -> Map()),
+          1L -> ("Project" -> Map()),
+          2L -> ("LocalTableScan" -> Map("number of output rows" -> 2L)),
+          3L -> ("LocalTableScan" -> Map("number of output rows" -> 2L))))
+      }
     }
   }