Skip to content

Commit 88d550b

Browse files
committed
Support for incremental materialization with ingestion time partition tables
1 parent 3d69869 commit 88d550b

File tree

6 files changed

+263
-35
lines changed

6 files changed

+263
-35
lines changed

CHANGELOG.md

+4
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,10 @@
44
- Use dbt.tests.adapter.basic in tests (new test framework) ([#135](https://github.com/dbt-labs/dbt-bigquery/issues/135), [#142](https://github.com/dbt-labs/dbt-bigquery/pull/142))
55
- Adding pre-commit and black formatter hooks ([#147](https://github.com/dbt-labs/dbt-bigquery/pull/147))
66
- Adding pre-commit code changes ([#148](https://github.com/dbt-labs/dbt-bigquery/pull/148))
7+
- Add support for ingestion time partitioned table using incremental materialization ([#136](https://github.com/dbt-labs/dbt-bigquery/pull/136))
8+
9+
### Contributors
10+
- [@Kayrnt](https://github.com/Kayrnt) ([#136](https://github.com/dbt-labs/dbt-bigquery/pull/136))
711

812
## dbt-bigquery 1.1.0b1 (March 23, 2022)
913
### Features

dbt/adapters/bigquery/connections.py

+19-5
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,11 @@
11
import json
22
import re
33
from contextlib import contextmanager
4-
from dataclasses import dataclass
4+
from dataclasses import dataclass, field
55
from functools import lru_cache
66
import agate
77
from requests.exceptions import ConnectionError
8-
from typing import Optional, Any, Dict, Tuple
8+
from typing import Optional, Any, Dict, Tuple, List
99

1010
import google.auth
1111
import google.auth.exceptions
@@ -86,6 +86,7 @@ class BigQueryConnectionMethod(StrEnum):
8686
@dataclass
8787
class BigQueryAdapterResponse(AdapterResponse):
8888
bytes_processed: Optional[int] = None
89+
fields: List[Any] = field(default_factory=list)
8990

9091

9192
@dataclass
@@ -434,6 +435,7 @@ def execute(
434435
code = None
435436
num_rows = None
436437
bytes_processed = None
438+
fields = list()
437439

438440
if query_job.statement_type == "CREATE_VIEW":
439441
code = "CREATE VIEW"
@@ -448,6 +450,7 @@ def execute(
448450
bytes_processed = query_job.total_bytes_processed
449451
processed_bytes = self.format_bytes(bytes_processed)
450452
message = f"{code} ({num_rows_formated} rows, {processed_bytes} processed)"
453+
fields = query_table.schema
451454

452455
elif query_job.statement_type == "SCRIPT":
453456
code = "SCRIPT"
@@ -473,9 +476,14 @@ def execute(
473476
bytes_processed = query_job.total_bytes_processed
474477
processed_bytes = self.format_bytes(bytes_processed)
475478
message = f"{code} ({num_rows_formated} rows, {processed_bytes} processed)"
479+
fields = query_table.schema
476480

477481
response = BigQueryAdapterResponse( # type: ignore[call-arg]
478-
_message=message, rows_affected=num_rows, code=code, bytes_processed=bytes_processed
482+
_message=message,
483+
rows_affected=num_rows,
484+
code=code,
485+
bytes_processed=bytes_processed,
486+
fields=fields,
479487
)
480488

481489
return response, table
@@ -529,7 +537,8 @@ def copy_and_results():
529537

530538
self._retry_and_handle(
531539
msg='copy table "{}" to "{}"'.format(
532-
", ".join(source_ref.path for source_ref in source_ref_array), destination_ref.path
540+
", ".join(source_ref.path for source_ref in source_ref_array),
541+
destination_ref.path,
533542
),
534543
conn=conn,
535544
fn=copy_and_results,
@@ -571,7 +580,12 @@ def fn():
571580
self._retry_and_handle(msg="create dataset", conn=conn, fn=fn)
572581

573582
def _query_and_results(
574-
self, client, sql, job_params, job_creation_timeout=None, job_execution_timeout=None
583+
self,
584+
client,
585+
sql,
586+
job_params,
587+
job_creation_timeout=None,
588+
job_execution_timeout=None,
575589
):
576590
"""Query the client and wait for results."""
577591
# Cannot reuse job_config if destination is set and ddl is used

dbt/adapters/bigquery/impl.py

+46-10
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,13 @@
77
import dbt.clients.agate_helper
88

99
from dbt import ui # type: ignore
10-
from dbt.adapters.base import BaseAdapter, available, RelationType, SchemaSearchMap, AdapterConfig
10+
from dbt.adapters.base import (
11+
BaseAdapter,
12+
available,
13+
RelationType,
14+
SchemaSearchMap,
15+
AdapterConfig,
16+
)
1117
from dbt.adapters.bigquery.relation import BigQueryRelation
1218
from dbt.adapters.bigquery import BigQueryColumn
1319
from dbt.adapters.bigquery import BigQueryConnectionManager
@@ -47,11 +53,15 @@ class PartitionConfig(dbtClassMixin):
4753
data_type: str = "date"
4854
granularity: str = "day"
4955
range: Optional[Dict[str, Any]] = None
56+
time_ingestion_partitioning: bool = False
57+
58+
def reject_partition_field_column(self, columns: List[Any]) -> List[str]:
59+
return [c for c in columns if not c.name.upper() == self.field.upper()]
5060

5161
def render(self, alias: Optional[str] = None):
52-
column: str = self.field
62+
column: str = self.field if not self.time_ingestion_partitioning else "_PARTITIONTIME"
5363
if alias:
54-
column = f"{alias}.{self.field}"
64+
column = f"{alias}.{column}"
5565

5666
if self.data_type.lower() == "int64" or (
5767
self.data_type.lower() == "date" and self.granularity.lower() == "day"
@@ -89,7 +99,11 @@ def render(self):
8999

90100
def _stub_relation(*args, **kwargs):
91101
return BigQueryRelation.create(
92-
database="", schema="", identifier="", quote_policy={}, type=BigQueryRelation.Table
102+
database="",
103+
schema="",
104+
identifier="",
105+
quote_policy={},
106+
type=BigQueryRelation.Table,
93107
)
94108

95109

@@ -209,14 +223,22 @@ def check_schema_exists(self, database: str, schema: str) -> bool:
209223
def get_columns_in_relation(self, relation: BigQueryRelation) -> List[BigQueryColumn]:
210224
try:
211225
table = self.connections.get_bq_table(
212-
database=relation.database, schema=relation.schema, identifier=relation.identifier
226+
database=relation.database,
227+
schema=relation.schema,
228+
identifier=relation.identifier,
213229
)
214230
return self._get_dbt_columns_from_bq_table(table)
215231

216232
except (ValueError, google.cloud.exceptions.NotFound) as e:
217233
logger.debug("get_columns_in_relation error: {}".format(e))
218234
return []
219235

236+
@available.parse(lambda *a, **k: [])
237+
def add_time_ingestion_partition_column(self, columns) -> List[BigQueryColumn]:
238+
"Add time ingestion partition column to columns list"
239+
columns.append(self.Column("_PARTITIONTIME", "TIMESTAMP", None, "NULLABLE"))
240+
return columns
241+
220242
def expand_column_types(self, goal: BigQueryRelation, current: BigQueryRelation) -> None: # type: ignore[override]
221243
# This is a no-op on BigQuery
222244
pass
@@ -358,7 +380,10 @@ def _materialize_as_view(self, model: Dict[str, Any]) -> str:
358380

359381
logger.debug("Model SQL ({}):\n{}".format(model_alias, model_sql))
360382
self.connections.create_view(
361-
database=model_database, schema=model_schema, table_name=model_alias, sql=model_sql
383+
database=model_database,
384+
schema=model_schema,
385+
table_name=model_alias,
386+
sql=model_sql,
362387
)
363388
return "CREATE VIEW"
364389

@@ -379,7 +404,10 @@ def _materialize_as_table(
379404

380405
logger.debug("Model SQL ({}):\n{}".format(table_name, model_sql))
381406
self.connections.create_table(
382-
database=model_database, schema=model_schema, table_name=table_name, sql=model_sql
407+
database=model_database,
408+
schema=model_schema,
409+
table_name=table_name,
410+
sql=model_sql,
383411
)
384412

385413
return "CREATE TABLE"
@@ -462,7 +490,8 @@ def _partitions_match(self, table, conf_partition: Optional[PartitionConfig]) ->
462490
if not is_partitioned and not conf_partition:
463491
return True
464492
elif conf_partition and table.time_partitioning is not None:
465-
table_field = table.time_partitioning.field.lower()
493+
partioning_field = table.time_partitioning.field or "_PARTITIONTIME"
494+
table_field = partioning_field.lower()
466495
table_granularity = table.partitioning_type.lower()
467496
return (
468497
table_field == conf_partition.field.lower()
@@ -508,7 +537,9 @@ def is_replaceable(
508537

509538
try:
510539
table = self.connections.get_bq_table(
511-
database=relation.database, schema=relation.schema, identifier=relation.identifier
540+
database=relation.database,
541+
schema=relation.schema,
542+
identifier=relation.identifier,
512543
)
513544
except google.cloud.exceptions.NotFound:
514545
return True
@@ -630,7 +661,12 @@ def load_dataframe(self, database, schema, table_name, agate_table, column_overr
630661

631662
@available.parse_none
632663
def upload_file(
633-
self, local_file_path: str, database: str, table_schema: str, table_name: str, **kwargs
664+
self,
665+
local_file_path: str,
666+
database: str,
667+
table_schema: str,
668+
table_name: str,
669+
**kwargs,
634670
) -> None:
635671
conn = self.connections.get_thread_connection()
636672
client = conn.handle

0 commit comments

Comments
 (0)