From f402090c1ebec9601e5fef6e45879d3a0a015dbd Mon Sep 17 00:00:00 2001
From: Mayuri Nehate <33225191+mayurinehate@users.noreply.github.com>
Date: Thu, 26 Oct 2023 21:44:32 +0530
Subject: [PATCH 001/792] feat(ingest): support view lineage for all sqlalchemy
sources (#9039)
---
metadata-ingestion/setup.py | 52 +-
.../src/datahub/configuration/common.py | 2 +-
.../datahub/emitter/sql_parsing_builder.py | 5 +-
.../api/incremental_lineage_helper.py | 13 +-
.../src/datahub/ingestion/api/source.py | 1 +
.../ingestion/source/dbt/dbt_common.py | 5 +
.../source/snowflake/snowflake_lineage_v2.py | 14 +-
.../src/datahub/ingestion/source/sql/hive.py | 83 ++-
.../datahub/ingestion/source/sql/postgres.py | 20 +-
.../ingestion/source/sql/sql_common.py | 126 +++-
.../ingestion/source/sql/sql_config.py | 19 +-
.../datahub/ingestion/source/sql/teradata.py | 54 +-
.../source/sql/two_tier_sql_source.py | 6 +-
.../datahub/ingestion/source/sql/vertica.py | 2 +-
.../source/state/stateful_ingestion_base.py | 3 +-
.../ingestion/source_config/sql/snowflake.py | 12 +-
.../src/datahub/utilities/sqlglot_lineage.py | 21 +-
.../hive/hive_mces_all_db_golden.json | 581 +++++++++++++++---
.../integration/hive/hive_mces_golden.json | 530 ++++++++++++++--
.../tests/integration/hive/hive_setup.sql | 22 +-
.../mysql/mysql_mces_no_db_golden.json | 272 ++++++--
.../postgres_all_db_mces_with_db_golden.json | 324 ++++++++--
..._db_to_file_with_db_estimate_row_count.yml | 2 +-
.../postgres_mces_with_db_golden.json | 264 +++++++-
...res_to_file_with_db_estimate_row_count.yml | 2 +-
.../snowflake/test_snowflake_failures.py | 3 +-
.../trino/trino_hive_mces_golden.json | 211 +++++--
.../test_incremental_lineage_helper.py | 21 +
28 files changed, 2193 insertions(+), 477 deletions(-)
diff --git a/metadata-ingestion/setup.py b/metadata-ingestion/setup.py
index 0b8661b0df5f5..7f7826abe2095 100644
--- a/metadata-ingestion/setup.py
+++ b/metadata-ingestion/setup.py
@@ -101,22 +101,36 @@
"grpcio-tools>=1.44.0,<2",
}
-sql_common = {
- # Required for all SQL sources.
- # This is temporary lower bound that we're open to loosening/tightening as requirements show up
- "sqlalchemy>=1.4.39, <2",
- # Required for SQL profiling.
- "great-expectations>=0.15.12, <=0.15.50",
- # scipy version restricted to reduce backtracking, used by great-expectations,
- "scipy>=1.7.2",
- # GE added handling for higher version of jinja2
- # https://github.com/great-expectations/great_expectations/pull/5382/files
- # datahub does not depend on traitlets directly but great expectations does.
- # https://github.com/ipython/traitlets/issues/741
- "traitlets<5.2.2",
- "greenlet",
+usage_common = {
+ "sqlparse",
+}
+
+sqlglot_lib = {
+ # Using an Acryl fork of sqlglot.
+ # https://github.com/tobymao/sqlglot/compare/main...hsheth2:sqlglot:hsheth?expand=1
+ "acryl-sqlglot==18.5.2.dev45",
}
+sql_common = (
+ {
+ # Required for all SQL sources.
+ # This is temporary lower bound that we're open to loosening/tightening as requirements show up
+ "sqlalchemy>=1.4.39, <2",
+ # Required for SQL profiling.
+ "great-expectations>=0.15.12, <=0.15.50",
+ # scipy version restricted to reduce backtracking, used by great-expectations,
+ "scipy>=1.7.2",
+ # GE added handling for higher version of jinja2
+ # https://github.com/great-expectations/great_expectations/pull/5382/files
+ # datahub does not depend on traitlets directly but great expectations does.
+ # https://github.com/ipython/traitlets/issues/741
+ "traitlets<5.2.2",
+ "greenlet",
+ }
+ | usage_common
+ | sqlglot_lib
+)
+
sqllineage_lib = {
"sqllineage==1.3.8",
# We don't have a direct dependency on sqlparse but it is a dependency of sqllineage.
@@ -125,12 +139,6 @@
"sqlparse==0.4.4",
}
-sqlglot_lib = {
- # Using an Acryl fork of sqlglot.
- # https://github.com/tobymao/sqlglot/compare/main...hsheth2:sqlglot:hsheth?expand=1
- "acryl-sqlglot==18.5.2.dev45",
-}
-
aws_common = {
# AWS Python SDK
"boto3",
@@ -243,10 +251,6 @@
powerbi_report_server = {"requests", "requests_ntlm"}
-usage_common = {
- "sqlparse",
-}
-
databricks = {
# 0.1.11 appears to have authentication issues with azure databricks
"databricks-sdk>=0.9.0",
diff --git a/metadata-ingestion/src/datahub/configuration/common.py b/metadata-ingestion/src/datahub/configuration/common.py
index c909b89eb0c2d..73ac4baac48c0 100644
--- a/metadata-ingestion/src/datahub/configuration/common.py
+++ b/metadata-ingestion/src/datahub/configuration/common.py
@@ -283,7 +283,7 @@ class VersionedConfig(ConfigModel):
class LineageConfig(ConfigModel):
incremental_lineage: bool = Field(
- default=True,
+ default=False,
description="When enabled, emits lineage as incremental to existing lineage already in DataHub. When disabled, re-states lineage on each run.",
)
diff --git a/metadata-ingestion/src/datahub/emitter/sql_parsing_builder.py b/metadata-ingestion/src/datahub/emitter/sql_parsing_builder.py
index dedcfa0385f75..cedaa4fbbd7f6 100644
--- a/metadata-ingestion/src/datahub/emitter/sql_parsing_builder.py
+++ b/metadata-ingestion/src/datahub/emitter/sql_parsing_builder.py
@@ -106,6 +106,7 @@ def process_sql_parsing_result(
user: Optional[UserUrn] = None,
custom_operation_type: Optional[str] = None,
include_urns: Optional[Set[DatasetUrn]] = None,
+ include_column_lineage: bool = True,
) -> Iterable[MetadataWorkUnit]:
"""Process a single query and yield any generated workunits.
@@ -130,7 +131,9 @@ def process_sql_parsing_result(
_merge_lineage_data(
downstream_urn=downstream_urn,
upstream_urns=result.in_tables,
- column_lineage=result.column_lineage,
+ column_lineage=result.column_lineage
+ if include_column_lineage
+ else None,
upstream_edges=self._lineage_map[downstream_urn],
query_timestamp=query_timestamp,
is_view_ddl=is_view_ddl,
diff --git a/metadata-ingestion/src/datahub/ingestion/api/incremental_lineage_helper.py b/metadata-ingestion/src/datahub/ingestion/api/incremental_lineage_helper.py
index 9478c5cf7efa2..945b201ca5758 100644
--- a/metadata-ingestion/src/datahub/ingestion/api/incremental_lineage_helper.py
+++ b/metadata-ingestion/src/datahub/ingestion/api/incremental_lineage_helper.py
@@ -130,10 +130,13 @@ def auto_incremental_lineage(
if len(wu.metadata.proposedSnapshot.aspects) > 0:
yield wu
- yield _lineage_wu_via_read_modify_write(
- graph, urn, lineage_aspect, wu.metadata.systemMetadata
- ) if lineage_aspect.fineGrainedLineages else _convert_upstream_lineage_to_patch(
- urn, lineage_aspect, wu.metadata.systemMetadata
- )
+ if lineage_aspect.fineGrainedLineages:
+ yield _lineage_wu_via_read_modify_write(
+ graph, urn, lineage_aspect, wu.metadata.systemMetadata
+ )
+ elif lineage_aspect.upstreams:
+ yield _convert_upstream_lineage_to_patch(
+ urn, lineage_aspect, wu.metadata.systemMetadata
+ )
else:
yield wu
diff --git a/metadata-ingestion/src/datahub/ingestion/api/source.py b/metadata-ingestion/src/datahub/ingestion/api/source.py
index b86844b1c4c83..8940642f7008a 100644
--- a/metadata-ingestion/src/datahub/ingestion/api/source.py
+++ b/metadata-ingestion/src/datahub/ingestion/api/source.py
@@ -215,6 +215,7 @@ def get_workunit_processors(self) -> List[Optional[MetadataWorkUnitProcessor]]:
)
):
auto_lowercase_dataset_urns = auto_lowercase_urns
+
return [
auto_lowercase_dataset_urns,
auto_status_aspect,
diff --git a/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_common.py b/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_common.py
index 48d2118a9b091..c4de24bf192f1 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_common.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_common.py
@@ -280,6 +280,11 @@ class DBTCommonConfig(
default=False,
description="When enabled, dbt test warnings will be treated as failures.",
)
+ # override fault value to True.
+ incremental_lineage: bool = Field(
+ default=True,
+ description="When enabled, emits lineage as incremental to existing lineage already in DataHub. When disabled, re-states lineage on each run.",
+ )
@validator("target_platform")
def validate_target_platform_value(cls, target_platform: str) -> str:
diff --git a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_lineage_v2.py b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_lineage_v2.py
index 0a15c352fc842..9649054dbe6cb 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_lineage_v2.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_lineage_v2.py
@@ -136,7 +136,6 @@ def get_workunits(
return
self._populate_external_lineage_map(discovered_tables)
-
if self.config.include_view_lineage:
if len(discovered_views) > 0:
yield from self.get_view_upstream_workunits(
@@ -200,14 +199,15 @@ def _gen_workunit_from_sql_parsing_result(
self,
dataset_identifier: str,
result: SqlParsingResult,
- ) -> MetadataWorkUnit:
+ ) -> Iterable[MetadataWorkUnit]:
upstreams, fine_upstreams = self.get_upstreams_from_sql_parsing_result(
self.dataset_urn_builder(dataset_identifier), result
)
- self.report.num_views_with_upstreams += 1
- return self._create_upstream_lineage_workunit(
- dataset_identifier, upstreams, fine_upstreams
- )
+ if upstreams:
+ self.report.num_views_with_upstreams += 1
+ yield self._create_upstream_lineage_workunit(
+ dataset_identifier, upstreams, fine_upstreams
+ )
def _gen_workunits_from_query_result(
self,
@@ -251,7 +251,7 @@ def get_view_upstream_workunits(
)
if result:
views_processed.add(view_identifier)
- yield self._gen_workunit_from_sql_parsing_result(
+ yield from self._gen_workunit_from_sql_parsing_result(
view_identifier, result
)
self.report.view_lineage_parse_secs = timer.elapsed_seconds()
diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/hive.py b/metadata-ingestion/src/datahub/ingestion/source/sql/hive.py
index 63b21bc82eddd..d081acb6c1eff 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/sql/hive.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/sql/hive.py
@@ -1,15 +1,18 @@
import json
import logging
import re
-from typing import Any, Dict, List, Optional
+from typing import Any, Dict, Iterable, List, Optional, Union
from pydantic.class_validators import validator
from pydantic.fields import Field
# This import verifies that the dependencies are available.
from pyhive import hive # noqa: F401
-from pyhive.sqlalchemy_hive import HiveDate, HiveDecimal, HiveTimestamp
+from pyhive.sqlalchemy_hive import HiveDate, HiveDecimal, HiveDialect, HiveTimestamp
+from sqlalchemy.engine.reflection import Inspector
+from datahub.emitter.mce_builder import make_dataset_urn_with_platform_instance
+from datahub.emitter.mcp import MetadataChangeProposalWrapper
from datahub.ingestion.api.decorators import (
SourceCapability,
SupportStatus,
@@ -18,8 +21,10 @@
platform_name,
support_status,
)
+from datahub.ingestion.api.workunit import MetadataWorkUnit
from datahub.ingestion.extractor import schema_util
-from datahub.ingestion.source.sql.sql_common import register_custom_type
+from datahub.ingestion.source.sql.sql_common import SqlWorkUnit, register_custom_type
+from datahub.ingestion.source.sql.sql_config import SQLCommonConfig
from datahub.ingestion.source.sql.two_tier_sql_source import (
TwoTierSQLAlchemyConfig,
TwoTierSQLAlchemySource,
@@ -31,6 +36,7 @@
SchemaField,
TimeTypeClass,
)
+from datahub.metadata.schema_classes import ViewPropertiesClass
from datahub.utilities import config_clean
from datahub.utilities.hive_schema_to_avro import get_avro_schema_for_hive_column
@@ -90,19 +96,34 @@ def dbapi_get_columns_patched(self, connection, table_name, schema=None, **kw):
logger.warning(f"Failed to patch method due to {e}")
+@reflection.cache # type: ignore
+def get_view_names_patched(self, connection, schema=None, **kw):
+ query = "SHOW VIEWS"
+ if schema:
+ query += " IN " + self.identifier_preparer.quote_identifier(schema)
+ return [row[0] for row in connection.execute(query)]
+
+
+@reflection.cache # type: ignore
+def get_view_definition_patched(self, connection, view_name, schema=None, **kw):
+ full_table = self.identifier_preparer.quote_identifier(view_name)
+ if schema:
+ full_table = "{}.{}".format(
+ self.identifier_preparer.quote_identifier(schema),
+ self.identifier_preparer.quote_identifier(view_name),
+ )
+ row = connection.execute("SHOW CREATE TABLE {}".format(full_table)).fetchone()
+ return row[0]
+
+
+HiveDialect.get_view_names = get_view_names_patched
+HiveDialect.get_view_definition = get_view_definition_patched
+
+
class HiveConfig(TwoTierSQLAlchemyConfig):
# defaults
scheme = Field(default="hive", hidden_from_docs=True)
- # Hive SQLAlchemy connector returns views as tables.
- # See https://github.com/dropbox/PyHive/blob/b21c507a24ed2f2b0cf15b0b6abb1c43f31d3ee0/pyhive/sqlalchemy_hive.py#L270-L273.
- # Disabling views helps us prevent this duplication.
- include_views = Field(
- default=False,
- hidden_from_docs=True,
- description="Hive SQLAlchemy connector returns views as tables. See https://github.com/dropbox/PyHive/blob/b21c507a24ed2f2b0cf15b0b6abb1c43f31d3ee0/pyhive/sqlalchemy_hive.py#L270-L273. Disabling views helps us prevent this duplication.",
- )
-
@validator("host_port")
def clean_host_port(cls, v):
return config_clean.remove_protocol(v)
@@ -174,3 +195,41 @@ def get_schema_fields_for_column(
return new_fields
return fields
+
+ # Hive SQLAlchemy connector returns views as tables in get_table_names.
+ # See https://github.com/dropbox/PyHive/blob/b21c507a24ed2f2b0cf15b0b6abb1c43f31d3ee0/pyhive/sqlalchemy_hive.py#L270-L273.
+ # This override makes sure that we ingest view definitions for views
+ def _process_view(
+ self,
+ dataset_name: str,
+ inspector: Inspector,
+ schema: str,
+ view: str,
+ sql_config: SQLCommonConfig,
+ ) -> Iterable[Union[SqlWorkUnit, MetadataWorkUnit]]:
+ dataset_urn = make_dataset_urn_with_platform_instance(
+ self.platform,
+ dataset_name,
+ self.config.platform_instance,
+ self.config.env,
+ )
+
+ try:
+ view_definition = inspector.get_view_definition(view, schema)
+ if view_definition is None:
+ view_definition = ""
+ else:
+ # Some dialects return a TextClause instead of a raw string,
+ # so we need to convert them to a string.
+ view_definition = str(view_definition)
+ except NotImplementedError:
+ view_definition = ""
+
+ if view_definition:
+ view_properties_aspect = ViewPropertiesClass(
+ materialized=False, viewLanguage="SQL", viewLogic=view_definition
+ )
+ yield MetadataChangeProposalWrapper(
+ entityUrn=dataset_urn,
+ aspect=view_properties_aspect,
+ ).as_workunit()
diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/postgres.py b/metadata-ingestion/src/datahub/ingestion/source/sql/postgres.py
index a6a9d8e2c8597..4f133c6459a0f 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/sql/postgres.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/sql/postgres.py
@@ -103,10 +103,6 @@ class BasePostgresConfig(BasicSQLAlchemyConfig):
class PostgresConfig(BasePostgresConfig):
- include_view_lineage = Field(
- default=False, description="Include table lineage for views"
- )
-
database_pattern: AllowDenyPattern = Field(
default=AllowDenyPattern.allow_all(),
description=(
@@ -183,9 +179,10 @@ def get_inspectors(self) -> Iterable[Inspector]:
def get_workunits_internal(self) -> Iterable[Union[MetadataWorkUnit, SqlWorkUnit]]:
yield from super().get_workunits_internal()
- for inspector in self.get_inspectors():
- if self.config.include_view_lineage:
- yield from self._get_view_lineage_workunits(inspector)
+ if self.views_failed_parsing:
+ for inspector in self.get_inspectors():
+ if self.config.include_view_lineage:
+ yield from self._get_view_lineage_workunits(inspector)
def _get_view_lineage_elements(
self, inspector: Inspector
@@ -245,11 +242,14 @@ def _get_view_lineage_workunits(
dependent_view, dependent_schema = key
# Construct a lineage object.
+ view_identifier = self.get_identifier(
+ schema=dependent_schema, entity=dependent_view, inspector=inspector
+ )
+ if view_identifier not in self.views_failed_parsing:
+ return
urn = mce_builder.make_dataset_urn_with_platform_instance(
platform=self.platform,
- name=self.get_identifier(
- schema=dependent_schema, entity=dependent_view, inspector=inspector
- ),
+ name=view_identifier,
platform_instance=self.config.platform_instance,
env=self.config.env,
)
diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/sql_common.py b/metadata-ingestion/src/datahub/ingestion/source/sql/sql_common.py
index fad9b9e8018a5..51909eaf4ed55 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/sql/sql_common.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/sql/sql_common.py
@@ -2,12 +2,14 @@
import logging
import traceback
from dataclasses import dataclass, field
+from functools import partial
from typing import (
TYPE_CHECKING,
Any,
Dict,
Iterable,
List,
+ MutableMapping,
Optional,
Set,
Tuple,
@@ -29,7 +31,9 @@
make_tag_urn,
)
from datahub.emitter.mcp import MetadataChangeProposalWrapper
+from datahub.emitter.sql_parsing_builder import SqlParsingBuilder
from datahub.ingestion.api.common import PipelineContext
+from datahub.ingestion.api.incremental_lineage_helper import auto_incremental_lineage
from datahub.ingestion.api.source import MetadataWorkUnitProcessor
from datahub.ingestion.api.workunit import MetadataWorkUnit
from datahub.ingestion.source.common.subtypes import (
@@ -86,9 +90,16 @@
ViewPropertiesClass,
)
from datahub.telemetry import telemetry
+from datahub.utilities.file_backed_collections import FileBackedDict
from datahub.utilities.lossy_collections import LossyList
from datahub.utilities.registries.domain_registry import DomainRegistry
from datahub.utilities.sqlalchemy_query_combiner import SQLAlchemyQueryCombinerReport
+from datahub.utilities.sqlglot_lineage import (
+ SchemaResolver,
+ SqlParsingResult,
+ sqlglot_lineage,
+ view_definition_lineage_helper,
+)
if TYPE_CHECKING:
from datahub.ingestion.source.ge_data_profiler import (
@@ -110,6 +121,11 @@ class SQLSourceReport(StaleEntityRemovalSourceReport):
query_combiner: Optional[SQLAlchemyQueryCombinerReport] = None
+ num_view_definitions_parsed: int = 0
+ num_view_definitions_failed_parsing: int = 0
+ num_view_definitions_failed_column_parsing: int = 0
+ view_definitions_parsing_failures: LossyList[str] = field(default_factory=LossyList)
+
def report_entity_scanned(self, name: str, ent_type: str = "table") -> None:
"""
Entity could be a view or a table
@@ -319,6 +335,18 @@ def __init__(self, config: SQLCommonConfig, ctx: PipelineContext, platform: str)
cached_domains=[k for k in self.config.domain], graph=self.ctx.graph
)
+ self.views_failed_parsing: Set[str] = set()
+ self.schema_resolver: SchemaResolver = SchemaResolver(
+ platform=self.platform,
+ platform_instance=self.config.platform_instance,
+ env=self.config.env,
+ )
+ self._view_definition_cache: MutableMapping[str, str]
+ if self.config.use_file_backed_cache:
+ self._view_definition_cache = FileBackedDict[str]()
+ else:
+ self._view_definition_cache = {}
+
def warn(self, log: logging.Logger, key: str, reason: str) -> None:
self.report.report_warning(key, reason[:100])
log.warning(f"{key} => {reason}")
@@ -455,6 +483,11 @@ def get_schema_level_workunits(
def get_workunit_processors(self) -> List[Optional[MetadataWorkUnitProcessor]]:
return [
*super().get_workunit_processors(),
+ partial(
+ auto_incremental_lineage,
+ self.ctx.graph,
+ self.config.incremental_lineage,
+ ),
StaleEntityRemovalHandler.create(
self, self.config, self.ctx
).workunit_processor,
@@ -512,6 +545,35 @@ def get_workunits_internal(self) -> Iterable[Union[MetadataWorkUnit, SqlWorkUnit
profile_requests, profiler, platform=self.platform
)
+ if self.config.include_view_lineage:
+ yield from self.get_view_lineage()
+
+ def get_view_lineage(self) -> Iterable[MetadataWorkUnit]:
+ builder = SqlParsingBuilder(
+ generate_lineage=True,
+ generate_usage_statistics=False,
+ generate_operations=False,
+ )
+ for dataset_name in self._view_definition_cache.keys():
+ view_definition = self._view_definition_cache[dataset_name]
+ result = self._run_sql_parser(
+ dataset_name,
+ view_definition,
+ self.schema_resolver,
+ )
+ if result and result.out_tables:
+ # This does not yield any workunits but we use
+ # yield here to execute this method
+ yield from builder.process_sql_parsing_result(
+ result=result,
+ query=view_definition,
+ is_view_ddl=True,
+ include_column_lineage=self.config.include_view_column_lineage,
+ )
+ else:
+ self.views_failed_parsing.add(dataset_name)
+ yield from builder.gen_workunits()
+
def get_identifier(
self, *, schema: str, entity: str, inspector: Inspector, **kwargs: Any
) -> str:
@@ -658,6 +720,8 @@ def _process_table(
schema_fields,
)
dataset_snapshot.aspects.append(schema_metadata)
+ if self.config.include_view_lineage:
+ self.schema_resolver.add_schema_metadata(dataset_urn, schema_metadata)
db_name = self.get_db_name(inspector)
yield from self.add_table_to_schema_container(
@@ -862,6 +926,12 @@ def _process_view(
view: str,
sql_config: SQLCommonConfig,
) -> Iterable[Union[SqlWorkUnit, MetadataWorkUnit]]:
+ dataset_urn = make_dataset_urn_with_platform_instance(
+ self.platform,
+ dataset_name,
+ self.config.platform_instance,
+ self.config.env,
+ )
try:
columns = inspector.get_columns(view, schema)
except KeyError:
@@ -877,6 +947,8 @@ def _process_view(
columns,
canonical_schema=schema_fields,
)
+ if self.config.include_view_lineage:
+ self.schema_resolver.add_schema_metadata(dataset_urn, schema_metadata)
description, properties, _ = self.get_table_properties(inspector, schema, view)
try:
view_definition = inspector.get_view_definition(view, schema)
@@ -890,12 +962,9 @@ def _process_view(
view_definition = ""
properties["view_definition"] = view_definition
properties["is_view"] = "True"
- dataset_urn = make_dataset_urn_with_platform_instance(
- self.platform,
- dataset_name,
- self.config.platform_instance,
- self.config.env,
- )
+ if view_definition and self.config.include_view_lineage:
+ self._view_definition_cache[dataset_name] = view_definition
+
dataset_snapshot = DatasetSnapshot(
urn=dataset_urn,
aspects=[StatusClass(removed=False)],
@@ -942,6 +1011,51 @@ def _process_view(
domain_registry=self.domain_registry,
)
+ def _run_sql_parser(
+ self, view_identifier: str, query: str, schema_resolver: SchemaResolver
+ ) -> Optional[SqlParsingResult]:
+ try:
+ database, schema = self.get_db_schema(view_identifier)
+ except ValueError:
+ logger.warning(f"Invalid view identifier: {view_identifier}")
+ return None
+ raw_lineage = sqlglot_lineage(
+ query,
+ schema_resolver=schema_resolver,
+ default_db=database,
+ default_schema=schema,
+ )
+ view_urn = make_dataset_urn_with_platform_instance(
+ self.platform,
+ view_identifier,
+ self.config.platform_instance,
+ self.config.env,
+ )
+
+ if raw_lineage.debug_info.table_error:
+ logger.debug(
+ f"Failed to parse lineage for view {view_identifier}: "
+ f"{raw_lineage.debug_info.table_error}"
+ )
+ self.report.num_view_definitions_failed_parsing += 1
+ self.report.view_definitions_parsing_failures.append(
+ f"Table-level sql parsing error for view {view_identifier}: {raw_lineage.debug_info.table_error}"
+ )
+ return None
+
+ elif raw_lineage.debug_info.column_error:
+ self.report.num_view_definitions_failed_column_parsing += 1
+ self.report.view_definitions_parsing_failures.append(
+ f"Column-level sql parsing error for view {view_identifier}: {raw_lineage.debug_info.column_error}"
+ )
+ else:
+ self.report.num_view_definitions_parsed += 1
+ return view_definition_lineage_helper(raw_lineage, view_urn)
+
+ def get_db_schema(self, dataset_identifier: str) -> Tuple[Optional[str], str]:
+ database, schema, _view = dataset_identifier.split(".")
+ return database, schema
+
def get_profiler_instance(self, inspector: Inspector) -> "DatahubGEProfiler":
from datahub.ingestion.source.ge_data_profiler import DatahubGEProfiler
diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/sql_config.py b/metadata-ingestion/src/datahub/ingestion/source/sql/sql_config.py
index 57aae32b361cf..095b8e6443171 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/sql/sql_config.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/sql/sql_config.py
@@ -6,7 +6,7 @@
from pydantic import Field
from sqlalchemy.engine import URL
-from datahub.configuration.common import AllowDenyPattern, ConfigModel
+from datahub.configuration.common import AllowDenyPattern, ConfigModel, LineageConfig
from datahub.configuration.source_common import (
DatasetSourceConfigMixin,
LowerCaseDatasetUrnConfigMixin,
@@ -28,6 +28,7 @@ class SQLCommonConfig(
StatefulIngestionConfigBase,
DatasetSourceConfigMixin,
LowerCaseDatasetUrnConfigMixin,
+ LineageConfig,
):
options: dict = pydantic.Field(
default_factory=dict,
@@ -70,6 +71,22 @@ class SQLCommonConfig(
description="If the source supports it, include table lineage to the underlying storage location.",
)
+ include_view_lineage: bool = Field(
+ default=True,
+ description="Populates view->view and table->view lineage using DataHub's sql parser.",
+ )
+
+ include_view_column_lineage: bool = Field(
+ default=True,
+ description="Populates column-level lineage for view->view and table->view lineage using DataHub's sql parser."
+ " Requires `include_view_lineage` to be enabled.",
+ )
+
+ use_file_backed_cache: bool = Field(
+ default=True,
+ description="Whether to use a file backed cache for the view definitions.",
+ )
+
profiling: GEProfilingConfig = GEProfilingConfig()
# Custom Stateful Ingestion settings
stateful_ingestion: Optional[StatefulStaleMetadataRemovalConfig] = None
diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/teradata.py b/metadata-ingestion/src/datahub/ingestion/source/sql/teradata.py
index e628e4dbd3446..899a7b6697c0a 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/sql/teradata.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/sql/teradata.py
@@ -1,7 +1,7 @@
import logging
from dataclasses import dataclass
from datetime import datetime
-from typing import Iterable, MutableMapping, Optional, Union
+from typing import Iterable, Optional, Union
# This import verifies that the dependencies are available.
import teradatasqlalchemy # noqa: F401
@@ -33,14 +33,11 @@
from datahub.ingestion.source.usage.usage_common import BaseUsageConfig
from datahub.ingestion.source_report.ingestion_stage import IngestionStageReport
from datahub.ingestion.source_report.time_window import BaseTimeWindowReport
-from datahub.metadata._schema_classes import SchemaMetadataClass, ViewPropertiesClass
from datahub.metadata.com.linkedin.pegasus2avro.schema import (
BytesTypeClass,
TimeTypeClass,
)
-from datahub.utilities.file_backed_collections import FileBackedDict
from datahub.utilities.sqlglot_lineage import SchemaResolver, sqlglot_lineage
-from datahub.utilities.urns.dataset_urn import DatasetUrn
logger: logging.Logger = logging.getLogger(__name__)
@@ -87,11 +84,6 @@ class TeradataConfig(BaseTeradataConfig, BaseTimeWindowConfig):
"This requires to have the table lineage feature enabled.",
)
- include_view_lineage = Field(
- default=True,
- description="Whether to include view lineage in the ingestion. "
- "This requires to have the view lineage feature enabled.",
- )
usage: BaseUsageConfig = Field(
description="The usage config to use when generating usage statistics",
default=BaseUsageConfig(),
@@ -107,11 +99,6 @@ class TeradataConfig(BaseTeradataConfig, BaseTimeWindowConfig):
description="Generate usage statistic.",
)
- use_file_backed_cache: bool = Field(
- default=True,
- description="Whether to use a file backed cache for the view definitions.",
- )
-
@platform_name("Teradata")
@config_class(TeradataConfig)
@@ -143,8 +130,6 @@ class TeradataSource(TwoTierSQLAlchemySource):
and "timestamp" < TIMESTAMP '{end_time}'
"""
- _view_definition_cache: MutableMapping[str, str]
-
def __init__(self, config: TeradataConfig, ctx: PipelineContext):
super().__init__(config, ctx, "teradata")
@@ -167,34 +152,11 @@ def __init__(self, config: TeradataConfig, ctx: PipelineContext):
env=self.config.env,
)
- if self.config.use_file_backed_cache:
- self._view_definition_cache = FileBackedDict[str]()
- else:
- self._view_definition_cache = {}
-
@classmethod
def create(cls, config_dict, ctx):
config = TeradataConfig.parse_obj(config_dict)
return cls(config, ctx)
- def get_view_lineage(self) -> Iterable[MetadataWorkUnit]:
- for key in self._view_definition_cache.keys():
- view_definition = self._view_definition_cache[key]
- dataset_urn = DatasetUrn.create_from_string(key)
-
- db_name: Optional[str] = None
- # We need to get the default db from the dataset urn otherwise the builder generates the wrong urns
- if "." in dataset_urn.get_dataset_name():
- db_name = dataset_urn.get_dataset_name().split(".", 1)[0]
-
- self.report.num_view_ddl_parsed += 1
- if self.report.num_view_ddl_parsed % 1000 == 0:
- logger.info(f"Parsed {self.report.num_queries_parsed} view ddl")
-
- yield from self.gen_lineage_from_query(
- query=view_definition, default_database=db_name, is_view_ddl=True
- )
-
def get_audit_log_mcps(self) -> Iterable[MetadataWorkUnit]:
engine = self.get_metadata_engine()
for entry in engine.execute(
@@ -252,19 +214,7 @@ def get_metadata_engine(self) -> Engine:
def get_workunits_internal(self) -> Iterable[Union[MetadataWorkUnit, SqlWorkUnit]]:
# Add all schemas to the schema resolver
- for wu in super().get_workunits_internal():
- urn = wu.get_urn()
- schema_metadata = wu.get_aspect_of_type(SchemaMetadataClass)
- if schema_metadata:
- self.schema_resolver.add_schema_metadata(urn, schema_metadata)
- view_properties = wu.get_aspect_of_type(ViewPropertiesClass)
- if view_properties and self.config.include_view_lineage:
- self._view_definition_cache[urn] = view_properties.viewLogic
- yield wu
-
- if self.config.include_view_lineage:
- self.report.report_ingestion_stage_start("view lineage extraction")
- yield from self.get_view_lineage()
+ yield from super().get_workunits_internal()
if self.config.include_table_lineage or self.config.include_usage_statistics:
self.report.report_ingestion_stage_start("audit log extraction")
diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/two_tier_sql_source.py b/metadata-ingestion/src/datahub/ingestion/source/sql/two_tier_sql_source.py
index 7a49551dc1235..efb1d3ffe119f 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/sql/two_tier_sql_source.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/sql/two_tier_sql_source.py
@@ -1,6 +1,6 @@
import typing
import urllib.parse
-from typing import Any, Dict, Iterable, Optional
+from typing import Any, Dict, Iterable, Optional, Tuple
from pydantic.fields import Field
from sqlalchemy import create_engine, inspect
@@ -71,6 +71,10 @@ def __init__(self, config, ctx, platform):
super().__init__(config, ctx, platform)
self.config: TwoTierSQLAlchemyConfig = config
+ def get_db_schema(self, dataset_identifier: str) -> Tuple[Optional[str], str]:
+ schema, _view = dataset_identifier.split(".", 1)
+ return None, schema
+
def get_database_container_key(self, db_name: str, schema: str) -> ContainerKey:
# Because our overridden get_allowed_schemas method returns db_name as the schema name,
# the db_name and schema here will be the same. Hence, we just ignore the schema parameter.
diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/vertica.py b/metadata-ingestion/src/datahub/ingestion/source/sql/vertica.py
index a417cae2b1ab0..b89db755853bc 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/sql/vertica.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/sql/vertica.py
@@ -86,7 +86,7 @@ class VerticaConfig(BasicSQLAlchemyConfig):
default=True, description="Whether Models should be ingested."
)
- include_view_lineage: Optional[bool] = pydantic.Field(
+ include_view_lineage: bool = pydantic.Field(
default=True,
description="If the source supports it, include view lineage to the underlying storage location.",
)
diff --git a/metadata-ingestion/src/datahub/ingestion/source/state/stateful_ingestion_base.py b/metadata-ingestion/src/datahub/ingestion/source/state/stateful_ingestion_base.py
index be97e9380f1f5..7fb2cf9813cab 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/state/stateful_ingestion_base.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/state/stateful_ingestion_base.py
@@ -11,7 +11,6 @@
ConfigModel,
ConfigurationError,
DynamicTypedConfig,
- LineageConfig,
)
from datahub.configuration.time_window_config import BaseTimeWindowConfig
from datahub.configuration.validate_field_rename import pydantic_renamed_field
@@ -100,7 +99,7 @@ class StatefulIngestionConfigBase(GenericModel, Generic[CustomConfig]):
)
-class StatefulLineageConfigMixin(LineageConfig):
+class StatefulLineageConfigMixin:
enable_stateful_lineage_ingestion: bool = Field(
default=True,
description="Enable stateful lineage ingestion."
diff --git a/metadata-ingestion/src/datahub/ingestion/source_config/sql/snowflake.py b/metadata-ingestion/src/datahub/ingestion/source_config/sql/snowflake.py
index 0d72fc52da0ca..c3e8c175f1de5 100644
--- a/metadata-ingestion/src/datahub/ingestion/source_config/sql/snowflake.py
+++ b/metadata-ingestion/src/datahub/ingestion/source_config/sql/snowflake.py
@@ -166,13 +166,17 @@ def _check_oauth_config(oauth_config: Optional[OAuthConfiguration]) -> None:
"but should be set when using use_certificate false for oauth_config"
)
- @pydantic.validator("include_view_lineage")
- def validate_include_view_lineage(cls, v, values):
- if not values.get("include_table_lineage") and v:
+ @pydantic.root_validator()
+ def validate_include_view_lineage(cls, values):
+ if (
+ "include_table_lineage" in values
+ and not values.get("include_table_lineage")
+ and values.get("include_view_lineage")
+ ):
raise ValueError(
"include_table_lineage must be True for include_view_lineage to be set."
)
- return v
+ return values
def get_sql_alchemy_url(
self,
diff --git a/metadata-ingestion/src/datahub/utilities/sqlglot_lineage.py b/metadata-ingestion/src/datahub/utilities/sqlglot_lineage.py
index 526d90b2a1bfa..1d74b20569814 100644
--- a/metadata-ingestion/src/datahub/utilities/sqlglot_lineage.py
+++ b/metadata-ingestion/src/datahub/utilities/sqlglot_lineage.py
@@ -623,9 +623,9 @@ def _schema_aware_fuzzy_column_resolve(
statement = sqlglot.optimizer.annotate_types.annotate_types(
statement, schema=sqlglot_db_schema
)
- except sqlglot.errors.OptimizeError as e:
+ except (sqlglot.errors.OptimizeError, sqlglot.errors.ParseError) as e:
# This is not a fatal error, so we can continue.
- logger.debug("sqlglot failed to annotate types: %s", e)
+ logger.debug("sqlglot failed to annotate or parse types: %s", e)
try:
assert isinstance(statement, _SupportedColumnLineageTypesTuple)
@@ -1156,3 +1156,20 @@ def create_lineage_sql_parsed_result(
finally:
if needs_close:
schema_resolver.close()
+
+
+def view_definition_lineage_helper(
+ result: SqlParsingResult, view_urn: str
+) -> SqlParsingResult:
+ if result.query_type is QueryType.SELECT:
+ # Some platforms (e.g. postgres) store only from view definition
+ # `create view V as ` . For such view definitions, `result.out_tables` and
+ # `result.column_lineage[].downstream` are empty in `sqlglot_lineage` response, whereas upstream
+ # details and downstream column details are extracted correctly.
+ # Here, we inject view V's urn in `result.out_tables` and `result.column_lineage[].downstream`
+ # to get complete lineage result.
+ result.out_tables = [view_urn]
+ if result.column_lineage:
+ for col_result in result.column_lineage:
+ col_result.downstream.table = view_urn
+ return result
diff --git a/metadata-ingestion/tests/integration/hive/hive_mces_all_db_golden.json b/metadata-ingestion/tests/integration/hive/hive_mces_all_db_golden.json
index f3b6d2b8138cc..6774d4c7055b9 100644
--- a/metadata-ingestion/tests/integration/hive/hive_mces_all_db_golden.json
+++ b/metadata-ingestion/tests/integration/hive/hive_mces_all_db_golden.json
@@ -16,7 +16,8 @@
},
"systemMetadata": {
"lastObserved": 1586847600000,
- "runId": "hive-test"
+ "runId": "hive-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -31,7 +32,8 @@
},
"systemMetadata": {
"lastObserved": 1586847600000,
- "runId": "hive-test"
+ "runId": "hive-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -46,7 +48,8 @@
},
"systemMetadata": {
"lastObserved": 1586847600000,
- "runId": "hive-test"
+ "runId": "hive-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -63,7 +66,8 @@
},
"systemMetadata": {
"lastObserved": 1586847600000,
- "runId": "hive-test"
+ "runId": "hive-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -78,7 +82,8 @@
},
"systemMetadata": {
"lastObserved": 1586847600000,
- "runId": "hive-test"
+ "runId": "hive-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -93,7 +98,8 @@
},
"systemMetadata": {
"lastObserved": 1586847600000,
- "runId": "hive-test"
+ "runId": "hive-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -111,7 +117,7 @@
"customProperties": {
"Database:": "db1",
"Owner:": "root",
- "CreateTime:": "Wed Jul 05 17:38:26 UTC 2023",
+ "CreateTime:": "Thu Oct 19 13:26:12 UTC 2023",
"LastAccessTime:": "UNKNOWN",
"Retention:": "0",
"Location:": "hdfs://namenode:8020/user/hive/warehouse/db1.db/_test_table_underscore",
@@ -121,7 +127,7 @@
"Table Parameters: numRows": "0",
"Table Parameters: rawDataSize": "0",
"Table Parameters: totalSize": "0",
- "Table Parameters: transient_lastDdlTime": "1688578706",
+ "Table Parameters: transient_lastDdlTime": "1697721972",
"SerDe Library:": "org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe",
"InputFormat:": "org.apache.hadoop.mapred.TextInputFormat",
"OutputFormat:": "org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat",
@@ -187,7 +193,8 @@
},
"systemMetadata": {
"lastObserved": 1586847600000,
- "runId": "hive-test"
+ "runId": "hive-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -204,7 +211,8 @@
},
"systemMetadata": {
"lastObserved": 1586847600000,
- "runId": "hive-test"
+ "runId": "hive-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -224,7 +232,8 @@
},
"systemMetadata": {
"lastObserved": 1586847600000,
- "runId": "hive-test"
+ "runId": "hive-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -239,7 +248,8 @@
},
"systemMetadata": {
"lastObserved": 1586847600000,
- "runId": "hive-test"
+ "runId": "hive-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -257,17 +267,19 @@
"customProperties": {
"Database:": "db1",
"Owner:": "root",
- "CreateTime:": "Wed Jul 05 17:38:26 UTC 2023",
+ "CreateTime:": "Thu Oct 19 13:26:12 UTC 2023",
"LastAccessTime:": "UNKNOWN",
"Retention:": "0",
"Location:": "hdfs://namenode:8020/user/hive/warehouse/db1.db/array_struct_test",
"Table Type:": "MANAGED_TABLE",
"Table Parameters: COLUMN_STATS_ACCURATE": "{\\\"BASIC_STATS\\\":\\\"true\\\"}",
+ "Table Parameters: another.comment": "This table has no partitions",
+ "Table Parameters: comment": "This table has array of structs",
"Table Parameters: numFiles": "1",
"Table Parameters: numRows": "1",
"Table Parameters: rawDataSize": "32",
"Table Parameters: totalSize": "33",
- "Table Parameters: transient_lastDdlTime": "1688578710",
+ "Table Parameters: transient_lastDdlTime": "1697721976",
"SerDe Library:": "org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe",
"InputFormat:": "org.apache.hadoop.mapred.TextInputFormat",
"OutputFormat:": "org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat",
@@ -278,6 +290,7 @@
"Storage Desc Params: serialization.format": "1"
},
"name": "array_struct_test",
+ "description": "This table has array of structs",
"tags": []
}
},
@@ -304,6 +317,7 @@
{
"fieldPath": "property_id",
"nullable": true,
+ "description": "id of property",
"type": {
"type": {
"com.linkedin.pegasus2avro.schema.NumberType": {}
@@ -316,6 +330,7 @@
{
"fieldPath": "[version=2.0].[type=struct].[type=array].[type=struct].service",
"nullable": true,
+ "description": "service types and providers",
"type": {
"type": {
"com.linkedin.pegasus2avro.schema.ArrayType": {
@@ -368,7 +383,8 @@
},
"systemMetadata": {
"lastObserved": 1586847600000,
- "runId": "hive-test"
+ "runId": "hive-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -385,7 +401,8 @@
},
"systemMetadata": {
"lastObserved": 1586847600000,
- "runId": "hive-test"
+ "runId": "hive-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -405,7 +422,189 @@
},
"systemMetadata": {
"lastObserved": 1586847600000,
- "runId": "hive-test"
+ "runId": "hive-test",
+ "lastRunId": "no-run-id-provided"
+ }
+},
+{
+ "entityType": "dataset",
+ "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,db1.array_struct_test_view,PROD)",
+ "changeType": "UPSERT",
+ "aspectName": "container",
+ "aspect": {
+ "json": {
+ "container": "urn:li:container:ded36d15fcfbbb939830549697122661"
+ }
+ },
+ "systemMetadata": {
+ "lastObserved": 1586847600000,
+ "runId": "hive-test",
+ "lastRunId": "no-run-id-provided"
+ }
+},
+{
+ "proposedSnapshot": {
+ "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": {
+ "urn": "urn:li:dataset:(urn:li:dataPlatform:hive,db1.array_struct_test_view,PROD)",
+ "aspects": [
+ {
+ "com.linkedin.pegasus2avro.common.Status": {
+ "removed": false
+ }
+ },
+ {
+ "com.linkedin.pegasus2avro.dataset.DatasetProperties": {
+ "customProperties": {
+ "Database:": "db1",
+ "Owner:": "root",
+ "CreateTime:": "Thu Oct 19 13:26:18 UTC 2023",
+ "LastAccessTime:": "UNKNOWN",
+ "Retention:": "0",
+ "Table Type:": "VIRTUAL_VIEW",
+ "Table Parameters: transient_lastDdlTime": "1697721978",
+ "SerDe Library:": "null",
+ "InputFormat:": "org.apache.hadoop.mapred.TextInputFormat",
+ "OutputFormat:": "org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat",
+ "Compressed:": "No",
+ "Num Buckets:": "-1",
+ "Bucket Columns:": "[]",
+ "Sort Columns:": "[]",
+ "View Original Text:": "select * from db1.array_struct_test",
+ "View Expanded Text:": "select `array_struct_test`.`property_id`, `array_struct_test`.`service` from `db1`.`array_struct_test`",
+ "View Rewrite Enabled:": "No"
+ },
+ "name": "array_struct_test_view",
+ "tags": []
+ }
+ },
+ {
+ "com.linkedin.pegasus2avro.schema.SchemaMetadata": {
+ "schemaName": "db1.array_struct_test_view",
+ "platform": "urn:li:dataPlatform:hive",
+ "version": 0,
+ "created": {
+ "time": 0,
+ "actor": "urn:li:corpuser:unknown"
+ },
+ "lastModified": {
+ "time": 0,
+ "actor": "urn:li:corpuser:unknown"
+ },
+ "hash": "",
+ "platformSchema": {
+ "com.linkedin.pegasus2avro.schema.MySqlDDL": {
+ "tableSchema": ""
+ }
+ },
+ "fields": [
+ {
+ "fieldPath": "property_id",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.pegasus2avro.schema.NumberType": {}
+ }
+ },
+ "nativeDataType": "int",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "[version=2.0].[type=struct].[type=array].[type=struct].service",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.pegasus2avro.schema.ArrayType": {
+ "nestedType": [
+ "record"
+ ]
+ }
+ }
+ },
+ "nativeDataType": "array>>",
+ "recursive": false,
+ "isPartOfKey": false,
+ "jsonProps": "{\"native_data_type\": \"array>>\"}"
+ },
+ {
+ "fieldPath": "[version=2.0].[type=struct].[type=array].[type=struct].service.[type=string].type",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.pegasus2avro.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false,
+ "jsonProps": "{\"native_data_type\": \"string\", \"_nullable\": true}"
+ },
+ {
+ "fieldPath": "[version=2.0].[type=struct].[type=array].[type=struct].service.[type=array].[type=int].provider",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.pegasus2avro.schema.ArrayType": {
+ "nestedType": [
+ "int"
+ ]
+ }
+ }
+ },
+ "nativeDataType": "array",
+ "recursive": false,
+ "isPartOfKey": false,
+ "jsonProps": "{\"native_data_type\": \"array\"}"
+ }
+ ]
+ }
+ }
+ ]
+ }
+ },
+ "systemMetadata": {
+ "lastObserved": 1586847600000,
+ "runId": "hive-test",
+ "lastRunId": "no-run-id-provided"
+ }
+},
+{
+ "entityType": "dataset",
+ "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,db1.array_struct_test_view,PROD)",
+ "changeType": "UPSERT",
+ "aspectName": "subTypes",
+ "aspect": {
+ "json": {
+ "typeNames": [
+ "Table"
+ ]
+ }
+ },
+ "systemMetadata": {
+ "lastObserved": 1586847600000,
+ "runId": "hive-test",
+ "lastRunId": "no-run-id-provided"
+ }
+},
+{
+ "entityType": "dataset",
+ "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,db1.array_struct_test_view,PROD)",
+ "changeType": "UPSERT",
+ "aspectName": "browsePathsV2",
+ "aspect": {
+ "json": {
+ "path": [
+ {
+ "id": "urn:li:container:ded36d15fcfbbb939830549697122661",
+ "urn": "urn:li:container:ded36d15fcfbbb939830549697122661"
+ }
+ ]
+ }
+ },
+ "systemMetadata": {
+ "lastObserved": 1586847600000,
+ "runId": "hive-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -420,7 +619,8 @@
},
"systemMetadata": {
"lastObserved": 1586847600000,
- "runId": "hive-test"
+ "runId": "hive-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -438,7 +638,7 @@
"customProperties": {
"Database:": "db1",
"Owner:": "root",
- "CreateTime:": "Wed Jul 05 17:38:30 UTC 2023",
+ "CreateTime:": "Thu Oct 19 13:26:18 UTC 2023",
"LastAccessTime:": "UNKNOWN",
"Retention:": "0",
"Location:": "hdfs://namenode:8020/user/hive/warehouse/db1.db/map_test",
@@ -448,7 +648,7 @@
"Table Parameters: numRows": "0",
"Table Parameters: rawDataSize": "0",
"Table Parameters: totalSize": "0",
- "Table Parameters: transient_lastDdlTime": "1688578710",
+ "Table Parameters: transient_lastDdlTime": "1697721978",
"SerDe Library:": "org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe",
"InputFormat:": "org.apache.hadoop.mapred.TextInputFormat",
"OutputFormat:": "org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat",
@@ -518,7 +718,8 @@
},
"systemMetadata": {
"lastObserved": 1586847600000,
- "runId": "hive-test"
+ "runId": "hive-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -535,7 +736,8 @@
},
"systemMetadata": {
"lastObserved": 1586847600000,
- "runId": "hive-test"
+ "runId": "hive-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -555,7 +757,8 @@
},
"systemMetadata": {
"lastObserved": 1586847600000,
- "runId": "hive-test"
+ "runId": "hive-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -570,7 +773,8 @@
},
"systemMetadata": {
"lastObserved": 1586847600000,
- "runId": "hive-test"
+ "runId": "hive-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -588,7 +792,7 @@
"customProperties": {
"Database:": "db1",
"Owner:": "root",
- "CreateTime:": "Wed Jul 05 17:38:30 UTC 2023",
+ "CreateTime:": "Thu Oct 19 13:26:18 UTC 2023",
"LastAccessTime:": "UNKNOWN",
"Retention:": "0",
"Location:": "hdfs://namenode:8020/user/hive/warehouse/db1.db/nested_struct_test",
@@ -598,7 +802,7 @@
"Table Parameters: numRows": "0",
"Table Parameters: rawDataSize": "0",
"Table Parameters: totalSize": "0",
- "Table Parameters: transient_lastDdlTime": "1688578710",
+ "Table Parameters: transient_lastDdlTime": "1697721978",
"SerDe Library:": "org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe",
"InputFormat:": "org.apache.hadoop.mapred.TextInputFormat",
"OutputFormat:": "org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat",
@@ -717,7 +921,8 @@
},
"systemMetadata": {
"lastObserved": 1586847600000,
- "runId": "hive-test"
+ "runId": "hive-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -734,7 +939,8 @@
},
"systemMetadata": {
"lastObserved": 1586847600000,
- "runId": "hive-test"
+ "runId": "hive-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -754,7 +960,8 @@
},
"systemMetadata": {
"lastObserved": 1586847600000,
- "runId": "hive-test"
+ "runId": "hive-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -769,7 +976,8 @@
},
"systemMetadata": {
"lastObserved": 1586847600000,
- "runId": "hive-test"
+ "runId": "hive-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -787,16 +995,17 @@
"customProperties": {
"Database:": "db1",
"Owner:": "root",
- "CreateTime:": "Wed Jul 05 17:38:22 UTC 2023",
+ "CreateTime:": "Thu Oct 19 13:26:08 UTC 2023",
"LastAccessTime:": "UNKNOWN",
"Retention:": "0",
"Location:": "hdfs://namenode:8020/user/hive/warehouse/db1.db/pokes",
"Table Type:": "MANAGED_TABLE",
"Table Parameters: numFiles": "1",
+ "Table Parameters: numPartitions": "1",
"Table Parameters: numRows": "0",
"Table Parameters: rawDataSize": "0",
"Table Parameters: totalSize": "5812",
- "Table Parameters: transient_lastDdlTime": "1688578704",
+ "Table Parameters: transient_lastDdlTime": "1697721968",
"SerDe Library:": "org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe",
"InputFormat:": "org.apache.hadoop.mapred.TextInputFormat",
"OutputFormat:": "org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat",
@@ -853,6 +1062,18 @@
"nativeDataType": "string",
"recursive": false,
"isPartOfKey": false
+ },
+ {
+ "fieldPath": "baz",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.pegasus2avro.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
}
]
}
@@ -862,7 +1083,8 @@
},
"systemMetadata": {
"lastObserved": 1586847600000,
- "runId": "hive-test"
+ "runId": "hive-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -879,7 +1101,8 @@
},
"systemMetadata": {
"lastObserved": 1586847600000,
- "runId": "hive-test"
+ "runId": "hive-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -899,7 +1122,8 @@
},
"systemMetadata": {
"lastObserved": 1586847600000,
- "runId": "hive-test"
+ "runId": "hive-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -914,7 +1138,8 @@
},
"systemMetadata": {
"lastObserved": 1586847600000,
- "runId": "hive-test"
+ "runId": "hive-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -932,7 +1157,7 @@
"customProperties": {
"Database:": "db1",
"Owner:": "root",
- "CreateTime:": "Wed Jul 05 17:38:26 UTC 2023",
+ "CreateTime:": "Thu Oct 19 13:26:12 UTC 2023",
"LastAccessTime:": "UNKNOWN",
"Retention:": "0",
"Location:": "hdfs://namenode:8020/user/hive/warehouse/db1.db/struct_test",
@@ -942,7 +1167,7 @@
"Table Parameters: numRows": "0",
"Table Parameters: rawDataSize": "0",
"Table Parameters: totalSize": "0",
- "Table Parameters: transient_lastDdlTime": "1688578706",
+ "Table Parameters: transient_lastDdlTime": "1697721972",
"SerDe Library:": "org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe",
"InputFormat:": "org.apache.hadoop.mapred.TextInputFormat",
"OutputFormat:": "org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat",
@@ -1039,7 +1264,8 @@
},
"systemMetadata": {
"lastObserved": 1586847600000,
- "runId": "hive-test"
+ "runId": "hive-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1056,7 +1282,8 @@
},
"systemMetadata": {
"lastObserved": 1586847600000,
- "runId": "hive-test"
+ "runId": "hive-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1076,7 +1303,188 @@
},
"systemMetadata": {
"lastObserved": 1586847600000,
- "runId": "hive-test"
+ "runId": "hive-test",
+ "lastRunId": "no-run-id-provided"
+ }
+},
+{
+ "entityType": "dataset",
+ "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,db1.struct_test_view_materialized,PROD)",
+ "changeType": "UPSERT",
+ "aspectName": "container",
+ "aspect": {
+ "json": {
+ "container": "urn:li:container:ded36d15fcfbbb939830549697122661"
+ }
+ },
+ "systemMetadata": {
+ "lastObserved": 1586847600000,
+ "runId": "hive-test",
+ "lastRunId": "no-run-id-provided"
+ }
+},
+{
+ "proposedSnapshot": {
+ "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": {
+ "urn": "urn:li:dataset:(urn:li:dataPlatform:hive,db1.struct_test_view_materialized,PROD)",
+ "aspects": [
+ {
+ "com.linkedin.pegasus2avro.common.Status": {
+ "removed": false
+ }
+ },
+ {
+ "com.linkedin.pegasus2avro.dataset.DatasetProperties": {
+ "customProperties": {
+ "Database:": "db1",
+ "Owner:": "root",
+ "CreateTime:": "Thu Oct 19 13:26:18 UTC 2023",
+ "LastAccessTime:": "UNKNOWN",
+ "Retention:": "0",
+ "Location:": "hdfs://namenode:8020/user/hive/warehouse/db1.db/struct_test_view_materialized",
+ "Table Type:": "MATERIALIZED_VIEW",
+ "Table Parameters: numFiles": "0",
+ "Table Parameters: totalSize": "0",
+ "Table Parameters: transient_lastDdlTime": "1697721978",
+ "SerDe Library:": "org.apache.hadoop.hive.ql.io.orc.OrcSerde",
+ "InputFormat:": "org.apache.hadoop.hive.ql.io.orc.OrcInputFormat",
+ "OutputFormat:": "org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat",
+ "Compressed:": "No",
+ "Num Buckets:": "-1",
+ "Bucket Columns:": "[]",
+ "Sort Columns:": "[]",
+ "View Original Text:": "select * from db1.struct_test",
+ "View Expanded Text:": "null",
+ "View Rewrite Enabled:": "No"
+ },
+ "name": "struct_test_view_materialized",
+ "tags": []
+ }
+ },
+ {
+ "com.linkedin.pegasus2avro.schema.SchemaMetadata": {
+ "schemaName": "db1.struct_test_view_materialized",
+ "platform": "urn:li:dataPlatform:hive",
+ "version": 0,
+ "created": {
+ "time": 0,
+ "actor": "urn:li:corpuser:unknown"
+ },
+ "lastModified": {
+ "time": 0,
+ "actor": "urn:li:corpuser:unknown"
+ },
+ "hash": "",
+ "platformSchema": {
+ "com.linkedin.pegasus2avro.schema.MySqlDDL": {
+ "tableSchema": ""
+ }
+ },
+ "fields": [
+ {
+ "fieldPath": "property_id",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.pegasus2avro.schema.NumberType": {}
+ }
+ },
+ "nativeDataType": "int",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "[version=2.0].[type=struct].[type=struct].service",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.pegasus2avro.schema.RecordType": {}
+ }
+ },
+ "nativeDataType": "struct>",
+ "recursive": false,
+ "isPartOfKey": false,
+ "jsonProps": "{\"native_data_type\": \"struct>\"}"
+ },
+ {
+ "fieldPath": "[version=2.0].[type=struct].[type=struct].service.[type=string].type",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.pegasus2avro.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false,
+ "jsonProps": "{\"native_data_type\": \"string\", \"_nullable\": true}"
+ },
+ {
+ "fieldPath": "[version=2.0].[type=struct].[type=struct].service.[type=array].[type=int].provider",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.pegasus2avro.schema.ArrayType": {
+ "nestedType": [
+ "int"
+ ]
+ }
+ }
+ },
+ "nativeDataType": "array",
+ "recursive": false,
+ "isPartOfKey": false,
+ "jsonProps": "{\"native_data_type\": \"array\"}"
+ }
+ ]
+ }
+ }
+ ]
+ }
+ },
+ "systemMetadata": {
+ "lastObserved": 1586847600000,
+ "runId": "hive-test",
+ "lastRunId": "no-run-id-provided"
+ }
+},
+{
+ "entityType": "dataset",
+ "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,db1.struct_test_view_materialized,PROD)",
+ "changeType": "UPSERT",
+ "aspectName": "subTypes",
+ "aspect": {
+ "json": {
+ "typeNames": [
+ "Table"
+ ]
+ }
+ },
+ "systemMetadata": {
+ "lastObserved": 1586847600000,
+ "runId": "hive-test",
+ "lastRunId": "no-run-id-provided"
+ }
+},
+{
+ "entityType": "dataset",
+ "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,db1.struct_test_view_materialized,PROD)",
+ "changeType": "UPSERT",
+ "aspectName": "browsePathsV2",
+ "aspect": {
+ "json": {
+ "path": [
+ {
+ "id": "urn:li:container:ded36d15fcfbbb939830549697122661",
+ "urn": "urn:li:container:ded36d15fcfbbb939830549697122661"
+ }
+ ]
+ }
+ },
+ "systemMetadata": {
+ "lastObserved": 1586847600000,
+ "runId": "hive-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1091,7 +1499,8 @@
},
"systemMetadata": {
"lastObserved": 1586847600000,
- "runId": "hive-test"
+ "runId": "hive-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1109,7 +1518,7 @@
"customProperties": {
"Database:": "db1",
"Owner:": "root",
- "CreateTime:": "Wed Jul 05 17:38:30 UTC 2023",
+ "CreateTime:": "Thu Oct 19 13:26:18 UTC 2023",
"LastAccessTime:": "UNKNOWN",
"Retention:": "0",
"Location:": "hdfs://namenode:8020/user/hive/warehouse/db1.db/union_test",
@@ -1119,10 +1528,10 @@
"Table Parameters: numRows": "0",
"Table Parameters: rawDataSize": "0",
"Table Parameters: totalSize": "0",
- "Table Parameters: transient_lastDdlTime": "1688578710",
- "SerDe Library:": "org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe",
- "InputFormat:": "org.apache.hadoop.mapred.TextInputFormat",
- "OutputFormat:": "org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat",
+ "Table Parameters: transient_lastDdlTime": "1697721978",
+ "SerDe Library:": "org.apache.hadoop.hive.ql.io.orc.OrcSerde",
+ "InputFormat:": "org.apache.hadoop.hive.ql.io.orc.OrcInputFormat",
+ "OutputFormat:": "org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat",
"Compressed:": "No",
"Num Buckets:": "-1",
"Bucket Columns:": "[]",
@@ -1285,7 +1694,8 @@
},
"systemMetadata": {
"lastObserved": 1586847600000,
- "runId": "hive-test"
+ "runId": "hive-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1302,7 +1712,8 @@
},
"systemMetadata": {
"lastObserved": 1586847600000,
- "runId": "hive-test"
+ "runId": "hive-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1322,7 +1733,26 @@
},
"systemMetadata": {
"lastObserved": 1586847600000,
- "runId": "hive-test"
+ "runId": "hive-test",
+ "lastRunId": "no-run-id-provided"
+ }
+},
+{
+ "entityType": "dataset",
+ "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,db1.array_struct_test_view,PROD)",
+ "changeType": "UPSERT",
+ "aspectName": "viewProperties",
+ "aspect": {
+ "json": {
+ "materialized": false,
+ "viewLogic": "CREATE VIEW `db1.array_struct_test_view` AS select `array_struct_test`.`property_id`, `array_struct_test`.`service` from `db1`.`array_struct_test`",
+ "viewLanguage": "SQL"
+ }
+ },
+ "systemMetadata": {
+ "lastObserved": 1586847600000,
+ "runId": "hive-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1342,7 +1772,8 @@
},
"systemMetadata": {
"lastObserved": 1586847600000,
- "runId": "hive-test"
+ "runId": "hive-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1357,7 +1788,8 @@
},
"systemMetadata": {
"lastObserved": 1586847600000,
- "runId": "hive-test"
+ "runId": "hive-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1372,7 +1804,8 @@
},
"systemMetadata": {
"lastObserved": 1586847600000,
- "runId": "hive-test"
+ "runId": "hive-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1389,7 +1822,8 @@
},
"systemMetadata": {
"lastObserved": 1586847600000,
- "runId": "hive-test"
+ "runId": "hive-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1404,7 +1838,8 @@
},
"systemMetadata": {
"lastObserved": 1586847600000,
- "runId": "hive-test"
+ "runId": "hive-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1419,7 +1854,8 @@
},
"systemMetadata": {
"lastObserved": 1586847600000,
- "runId": "hive-test"
+ "runId": "hive-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1437,7 +1873,7 @@
"customProperties": {
"Database:": "db2",
"Owner:": "root",
- "CreateTime:": "Wed Jul 05 17:38:24 UTC 2023",
+ "CreateTime:": "Thu Oct 19 13:26:10 UTC 2023",
"LastAccessTime:": "UNKNOWN",
"Retention:": "0",
"Location:": "hdfs://namenode:8020/user/hive/warehouse/db2.db/pokes",
@@ -1446,7 +1882,7 @@
"Table Parameters: numRows": "0",
"Table Parameters: rawDataSize": "0",
"Table Parameters: totalSize": "5812",
- "Table Parameters: transient_lastDdlTime": "1688578706",
+ "Table Parameters: transient_lastDdlTime": "1697721971",
"SerDe Library:": "org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe",
"InputFormat:": "org.apache.hadoop.mapred.TextInputFormat",
"OutputFormat:": "org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat",
@@ -1454,10 +1890,7 @@
"Num Buckets:": "-1",
"Bucket Columns:": "[]",
"Sort Columns:": "[]",
- "Storage Desc Params: serialization.format": "1",
- "Table:": "db2.pokes",
- "Constraint Name:": "pk_1173723383_1683022998392_0",
- "Column Names:": "foo"
+ "Storage Desc Params: serialization.format": "1"
},
"name": "pokes",
"tags": []
@@ -1515,7 +1948,8 @@
},
"systemMetadata": {
"lastObserved": 1586847600000,
- "runId": "hive-test"
+ "runId": "hive-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1532,7 +1966,8 @@
},
"systemMetadata": {
"lastObserved": 1586847600000,
- "runId": "hive-test"
+ "runId": "hive-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1552,7 +1987,8 @@
},
"systemMetadata": {
"lastObserved": 1586847600000,
- "runId": "hive-test"
+ "runId": "hive-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1572,7 +2008,8 @@
},
"systemMetadata": {
"lastObserved": 1586847600000,
- "runId": "hive-test"
+ "runId": "hive-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1587,7 +2024,8 @@
},
"systemMetadata": {
"lastObserved": 1586847600000,
- "runId": "hive-test"
+ "runId": "hive-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1602,7 +2040,8 @@
},
"systemMetadata": {
"lastObserved": 1586847600000,
- "runId": "hive-test"
+ "runId": "hive-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1619,7 +2058,8 @@
},
"systemMetadata": {
"lastObserved": 1586847600000,
- "runId": "hive-test"
+ "runId": "hive-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1634,7 +2074,8 @@
},
"systemMetadata": {
"lastObserved": 1586847600000,
- "runId": "hive-test"
+ "runId": "hive-test",
+ "lastRunId": "no-run-id-provided"
}
}
]
\ No newline at end of file
diff --git a/metadata-ingestion/tests/integration/hive/hive_mces_golden.json b/metadata-ingestion/tests/integration/hive/hive_mces_golden.json
index 08f281f398909..e93924049f626 100644
--- a/metadata-ingestion/tests/integration/hive/hive_mces_golden.json
+++ b/metadata-ingestion/tests/integration/hive/hive_mces_golden.json
@@ -16,7 +16,8 @@
},
"systemMetadata": {
"lastObserved": 1586847600000,
- "runId": "hive-test"
+ "runId": "hive-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -31,7 +32,8 @@
},
"systemMetadata": {
"lastObserved": 1586847600000,
- "runId": "hive-test"
+ "runId": "hive-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -46,7 +48,8 @@
},
"systemMetadata": {
"lastObserved": 1586847600000,
- "runId": "hive-test"
+ "runId": "hive-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -63,7 +66,8 @@
},
"systemMetadata": {
"lastObserved": 1586847600000,
- "runId": "hive-test"
+ "runId": "hive-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -78,7 +82,8 @@
},
"systemMetadata": {
"lastObserved": 1586847600000,
- "runId": "hive-test"
+ "runId": "hive-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -93,7 +98,8 @@
},
"systemMetadata": {
"lastObserved": 1586847600000,
- "runId": "hive-test"
+ "runId": "hive-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -111,7 +117,7 @@
"customProperties": {
"Database:": "db1",
"Owner:": "root",
- "CreateTime:": "Wed Jul 05 17:38:26 UTC 2023",
+ "CreateTime:": "Thu Oct 19 13:26:12 UTC 2023",
"LastAccessTime:": "UNKNOWN",
"Retention:": "0",
"Location:": "hdfs://namenode:8020/user/hive/warehouse/db1.db/_test_table_underscore",
@@ -121,7 +127,7 @@
"Table Parameters: numRows": "0",
"Table Parameters: rawDataSize": "0",
"Table Parameters: totalSize": "0",
- "Table Parameters: transient_lastDdlTime": "1688578706",
+ "Table Parameters: transient_lastDdlTime": "1697721972",
"SerDe Library:": "org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe",
"InputFormat:": "org.apache.hadoop.mapred.TextInputFormat",
"OutputFormat:": "org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat",
@@ -187,7 +193,8 @@
},
"systemMetadata": {
"lastObserved": 1586847600000,
- "runId": "hive-test"
+ "runId": "hive-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -204,7 +211,8 @@
},
"systemMetadata": {
"lastObserved": 1586847600000,
- "runId": "hive-test"
+ "runId": "hive-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -224,7 +232,8 @@
},
"systemMetadata": {
"lastObserved": 1586847600000,
- "runId": "hive-test"
+ "runId": "hive-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -239,7 +248,8 @@
},
"systemMetadata": {
"lastObserved": 1586847600000,
- "runId": "hive-test"
+ "runId": "hive-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -257,17 +267,19 @@
"customProperties": {
"Database:": "db1",
"Owner:": "root",
- "CreateTime:": "Wed Jul 05 17:38:26 UTC 2023",
+ "CreateTime:": "Thu Oct 19 13:26:12 UTC 2023",
"LastAccessTime:": "UNKNOWN",
"Retention:": "0",
"Location:": "hdfs://namenode:8020/user/hive/warehouse/db1.db/array_struct_test",
"Table Type:": "MANAGED_TABLE",
"Table Parameters: COLUMN_STATS_ACCURATE": "{\\\"BASIC_STATS\\\":\\\"true\\\"}",
+ "Table Parameters: another.comment": "This table has no partitions",
+ "Table Parameters: comment": "This table has array of structs",
"Table Parameters: numFiles": "1",
"Table Parameters: numRows": "1",
"Table Parameters: rawDataSize": "32",
"Table Parameters: totalSize": "33",
- "Table Parameters: transient_lastDdlTime": "1688578710",
+ "Table Parameters: transient_lastDdlTime": "1697721976",
"SerDe Library:": "org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe",
"InputFormat:": "org.apache.hadoop.mapred.TextInputFormat",
"OutputFormat:": "org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat",
@@ -278,6 +290,7 @@
"Storage Desc Params: serialization.format": "1"
},
"name": "array_struct_test",
+ "description": "This table has array of structs",
"tags": []
}
},
@@ -304,6 +317,7 @@
{
"fieldPath": "property_id",
"nullable": true,
+ "description": "id of property",
"type": {
"type": {
"com.linkedin.pegasus2avro.schema.NumberType": {}
@@ -316,6 +330,7 @@
{
"fieldPath": "[version=2.0].[type=struct].[type=array].[type=struct].service",
"nullable": true,
+ "description": "service types and providers",
"type": {
"type": {
"com.linkedin.pegasus2avro.schema.ArrayType": {
@@ -368,7 +383,8 @@
},
"systemMetadata": {
"lastObserved": 1586847600000,
- "runId": "hive-test"
+ "runId": "hive-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -385,7 +401,8 @@
},
"systemMetadata": {
"lastObserved": 1586847600000,
- "runId": "hive-test"
+ "runId": "hive-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -405,7 +422,189 @@
},
"systemMetadata": {
"lastObserved": 1586847600000,
- "runId": "hive-test"
+ "runId": "hive-test",
+ "lastRunId": "no-run-id-provided"
+ }
+},
+{
+ "entityType": "dataset",
+ "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,db1.array_struct_test_view,PROD)",
+ "changeType": "UPSERT",
+ "aspectName": "container",
+ "aspect": {
+ "json": {
+ "container": "urn:li:container:ded36d15fcfbbb939830549697122661"
+ }
+ },
+ "systemMetadata": {
+ "lastObserved": 1586847600000,
+ "runId": "hive-test",
+ "lastRunId": "no-run-id-provided"
+ }
+},
+{
+ "proposedSnapshot": {
+ "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": {
+ "urn": "urn:li:dataset:(urn:li:dataPlatform:hive,db1.array_struct_test_view,PROD)",
+ "aspects": [
+ {
+ "com.linkedin.pegasus2avro.common.Status": {
+ "removed": false
+ }
+ },
+ {
+ "com.linkedin.pegasus2avro.dataset.DatasetProperties": {
+ "customProperties": {
+ "Database:": "db1",
+ "Owner:": "root",
+ "CreateTime:": "Thu Oct 19 13:26:18 UTC 2023",
+ "LastAccessTime:": "UNKNOWN",
+ "Retention:": "0",
+ "Table Type:": "VIRTUAL_VIEW",
+ "Table Parameters: transient_lastDdlTime": "1697721978",
+ "SerDe Library:": "null",
+ "InputFormat:": "org.apache.hadoop.mapred.TextInputFormat",
+ "OutputFormat:": "org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat",
+ "Compressed:": "No",
+ "Num Buckets:": "-1",
+ "Bucket Columns:": "[]",
+ "Sort Columns:": "[]",
+ "View Original Text:": "select * from db1.array_struct_test",
+ "View Expanded Text:": "select `array_struct_test`.`property_id`, `array_struct_test`.`service` from `db1`.`array_struct_test`",
+ "View Rewrite Enabled:": "No"
+ },
+ "name": "array_struct_test_view",
+ "tags": []
+ }
+ },
+ {
+ "com.linkedin.pegasus2avro.schema.SchemaMetadata": {
+ "schemaName": "db1.array_struct_test_view",
+ "platform": "urn:li:dataPlatform:hive",
+ "version": 0,
+ "created": {
+ "time": 0,
+ "actor": "urn:li:corpuser:unknown"
+ },
+ "lastModified": {
+ "time": 0,
+ "actor": "urn:li:corpuser:unknown"
+ },
+ "hash": "",
+ "platformSchema": {
+ "com.linkedin.pegasus2avro.schema.MySqlDDL": {
+ "tableSchema": ""
+ }
+ },
+ "fields": [
+ {
+ "fieldPath": "property_id",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.pegasus2avro.schema.NumberType": {}
+ }
+ },
+ "nativeDataType": "int",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "[version=2.0].[type=struct].[type=array].[type=struct].service",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.pegasus2avro.schema.ArrayType": {
+ "nestedType": [
+ "record"
+ ]
+ }
+ }
+ },
+ "nativeDataType": "array>>",
+ "recursive": false,
+ "isPartOfKey": false,
+ "jsonProps": "{\"native_data_type\": \"array>>\"}"
+ },
+ {
+ "fieldPath": "[version=2.0].[type=struct].[type=array].[type=struct].service.[type=string].type",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.pegasus2avro.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false,
+ "jsonProps": "{\"native_data_type\": \"string\", \"_nullable\": true}"
+ },
+ {
+ "fieldPath": "[version=2.0].[type=struct].[type=array].[type=struct].service.[type=array].[type=int].provider",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.pegasus2avro.schema.ArrayType": {
+ "nestedType": [
+ "int"
+ ]
+ }
+ }
+ },
+ "nativeDataType": "array",
+ "recursive": false,
+ "isPartOfKey": false,
+ "jsonProps": "{\"native_data_type\": \"array\"}"
+ }
+ ]
+ }
+ }
+ ]
+ }
+ },
+ "systemMetadata": {
+ "lastObserved": 1586847600000,
+ "runId": "hive-test",
+ "lastRunId": "no-run-id-provided"
+ }
+},
+{
+ "entityType": "dataset",
+ "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,db1.array_struct_test_view,PROD)",
+ "changeType": "UPSERT",
+ "aspectName": "subTypes",
+ "aspect": {
+ "json": {
+ "typeNames": [
+ "Table"
+ ]
+ }
+ },
+ "systemMetadata": {
+ "lastObserved": 1586847600000,
+ "runId": "hive-test",
+ "lastRunId": "no-run-id-provided"
+ }
+},
+{
+ "entityType": "dataset",
+ "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,db1.array_struct_test_view,PROD)",
+ "changeType": "UPSERT",
+ "aspectName": "browsePathsV2",
+ "aspect": {
+ "json": {
+ "path": [
+ {
+ "id": "urn:li:container:ded36d15fcfbbb939830549697122661",
+ "urn": "urn:li:container:ded36d15fcfbbb939830549697122661"
+ }
+ ]
+ }
+ },
+ "systemMetadata": {
+ "lastObserved": 1586847600000,
+ "runId": "hive-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -420,7 +619,8 @@
},
"systemMetadata": {
"lastObserved": 1586847600000,
- "runId": "hive-test"
+ "runId": "hive-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -438,7 +638,7 @@
"customProperties": {
"Database:": "db1",
"Owner:": "root",
- "CreateTime:": "Wed Jul 05 17:38:30 UTC 2023",
+ "CreateTime:": "Thu Oct 19 13:26:18 UTC 2023",
"LastAccessTime:": "UNKNOWN",
"Retention:": "0",
"Location:": "hdfs://namenode:8020/user/hive/warehouse/db1.db/map_test",
@@ -448,7 +648,7 @@
"Table Parameters: numRows": "0",
"Table Parameters: rawDataSize": "0",
"Table Parameters: totalSize": "0",
- "Table Parameters: transient_lastDdlTime": "1688578710",
+ "Table Parameters: transient_lastDdlTime": "1697721978",
"SerDe Library:": "org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe",
"InputFormat:": "org.apache.hadoop.mapred.TextInputFormat",
"OutputFormat:": "org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat",
@@ -518,7 +718,8 @@
},
"systemMetadata": {
"lastObserved": 1586847600000,
- "runId": "hive-test"
+ "runId": "hive-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -535,7 +736,8 @@
},
"systemMetadata": {
"lastObserved": 1586847600000,
- "runId": "hive-test"
+ "runId": "hive-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -555,7 +757,8 @@
},
"systemMetadata": {
"lastObserved": 1586847600000,
- "runId": "hive-test"
+ "runId": "hive-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -570,7 +773,8 @@
},
"systemMetadata": {
"lastObserved": 1586847600000,
- "runId": "hive-test"
+ "runId": "hive-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -588,7 +792,7 @@
"customProperties": {
"Database:": "db1",
"Owner:": "root",
- "CreateTime:": "Wed Jul 05 17:38:30 UTC 2023",
+ "CreateTime:": "Thu Oct 19 13:26:18 UTC 2023",
"LastAccessTime:": "UNKNOWN",
"Retention:": "0",
"Location:": "hdfs://namenode:8020/user/hive/warehouse/db1.db/nested_struct_test",
@@ -598,7 +802,7 @@
"Table Parameters: numRows": "0",
"Table Parameters: rawDataSize": "0",
"Table Parameters: totalSize": "0",
- "Table Parameters: transient_lastDdlTime": "1688578710",
+ "Table Parameters: transient_lastDdlTime": "1697721978",
"SerDe Library:": "org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe",
"InputFormat:": "org.apache.hadoop.mapred.TextInputFormat",
"OutputFormat:": "org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat",
@@ -717,7 +921,8 @@
},
"systemMetadata": {
"lastObserved": 1586847600000,
- "runId": "hive-test"
+ "runId": "hive-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -734,7 +939,8 @@
},
"systemMetadata": {
"lastObserved": 1586847600000,
- "runId": "hive-test"
+ "runId": "hive-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -754,7 +960,8 @@
},
"systemMetadata": {
"lastObserved": 1586847600000,
- "runId": "hive-test"
+ "runId": "hive-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -769,7 +976,8 @@
},
"systemMetadata": {
"lastObserved": 1586847600000,
- "runId": "hive-test"
+ "runId": "hive-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -787,16 +995,17 @@
"customProperties": {
"Database:": "db1",
"Owner:": "root",
- "CreateTime:": "Wed Jul 05 17:38:22 UTC 2023",
+ "CreateTime:": "Thu Oct 19 13:26:08 UTC 2023",
"LastAccessTime:": "UNKNOWN",
"Retention:": "0",
"Location:": "hdfs://namenode:8020/user/hive/warehouse/db1.db/pokes",
"Table Type:": "MANAGED_TABLE",
"Table Parameters: numFiles": "1",
+ "Table Parameters: numPartitions": "1",
"Table Parameters: numRows": "0",
"Table Parameters: rawDataSize": "0",
"Table Parameters: totalSize": "5812",
- "Table Parameters: transient_lastDdlTime": "1688578704",
+ "Table Parameters: transient_lastDdlTime": "1697721968",
"SerDe Library:": "org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe",
"InputFormat:": "org.apache.hadoop.mapred.TextInputFormat",
"OutputFormat:": "org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat",
@@ -853,6 +1062,18 @@
"nativeDataType": "string",
"recursive": false,
"isPartOfKey": false
+ },
+ {
+ "fieldPath": "baz",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.pegasus2avro.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
}
]
}
@@ -862,7 +1083,8 @@
},
"systemMetadata": {
"lastObserved": 1586847600000,
- "runId": "hive-test"
+ "runId": "hive-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -879,7 +1101,8 @@
},
"systemMetadata": {
"lastObserved": 1586847600000,
- "runId": "hive-test"
+ "runId": "hive-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -899,7 +1122,8 @@
},
"systemMetadata": {
"lastObserved": 1586847600000,
- "runId": "hive-test"
+ "runId": "hive-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -914,7 +1138,8 @@
},
"systemMetadata": {
"lastObserved": 1586847600000,
- "runId": "hive-test"
+ "runId": "hive-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -932,7 +1157,7 @@
"customProperties": {
"Database:": "db1",
"Owner:": "root",
- "CreateTime:": "Wed Jul 05 17:38:26 UTC 2023",
+ "CreateTime:": "Thu Oct 19 13:26:12 UTC 2023",
"LastAccessTime:": "UNKNOWN",
"Retention:": "0",
"Location:": "hdfs://namenode:8020/user/hive/warehouse/db1.db/struct_test",
@@ -942,7 +1167,7 @@
"Table Parameters: numRows": "0",
"Table Parameters: rawDataSize": "0",
"Table Parameters: totalSize": "0",
- "Table Parameters: transient_lastDdlTime": "1688578706",
+ "Table Parameters: transient_lastDdlTime": "1697721972",
"SerDe Library:": "org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe",
"InputFormat:": "org.apache.hadoop.mapred.TextInputFormat",
"OutputFormat:": "org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat",
@@ -1039,7 +1264,8 @@
},
"systemMetadata": {
"lastObserved": 1586847600000,
- "runId": "hive-test"
+ "runId": "hive-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1056,7 +1282,8 @@
},
"systemMetadata": {
"lastObserved": 1586847600000,
- "runId": "hive-test"
+ "runId": "hive-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1076,7 +1303,188 @@
},
"systemMetadata": {
"lastObserved": 1586847600000,
- "runId": "hive-test"
+ "runId": "hive-test",
+ "lastRunId": "no-run-id-provided"
+ }
+},
+{
+ "entityType": "dataset",
+ "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,db1.struct_test_view_materialized,PROD)",
+ "changeType": "UPSERT",
+ "aspectName": "container",
+ "aspect": {
+ "json": {
+ "container": "urn:li:container:ded36d15fcfbbb939830549697122661"
+ }
+ },
+ "systemMetadata": {
+ "lastObserved": 1586847600000,
+ "runId": "hive-test",
+ "lastRunId": "no-run-id-provided"
+ }
+},
+{
+ "proposedSnapshot": {
+ "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": {
+ "urn": "urn:li:dataset:(urn:li:dataPlatform:hive,db1.struct_test_view_materialized,PROD)",
+ "aspects": [
+ {
+ "com.linkedin.pegasus2avro.common.Status": {
+ "removed": false
+ }
+ },
+ {
+ "com.linkedin.pegasus2avro.dataset.DatasetProperties": {
+ "customProperties": {
+ "Database:": "db1",
+ "Owner:": "root",
+ "CreateTime:": "Thu Oct 19 13:26:18 UTC 2023",
+ "LastAccessTime:": "UNKNOWN",
+ "Retention:": "0",
+ "Location:": "hdfs://namenode:8020/user/hive/warehouse/db1.db/struct_test_view_materialized",
+ "Table Type:": "MATERIALIZED_VIEW",
+ "Table Parameters: numFiles": "0",
+ "Table Parameters: totalSize": "0",
+ "Table Parameters: transient_lastDdlTime": "1697721978",
+ "SerDe Library:": "org.apache.hadoop.hive.ql.io.orc.OrcSerde",
+ "InputFormat:": "org.apache.hadoop.hive.ql.io.orc.OrcInputFormat",
+ "OutputFormat:": "org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat",
+ "Compressed:": "No",
+ "Num Buckets:": "-1",
+ "Bucket Columns:": "[]",
+ "Sort Columns:": "[]",
+ "View Original Text:": "select * from db1.struct_test",
+ "View Expanded Text:": "null",
+ "View Rewrite Enabled:": "No"
+ },
+ "name": "struct_test_view_materialized",
+ "tags": []
+ }
+ },
+ {
+ "com.linkedin.pegasus2avro.schema.SchemaMetadata": {
+ "schemaName": "db1.struct_test_view_materialized",
+ "platform": "urn:li:dataPlatform:hive",
+ "version": 0,
+ "created": {
+ "time": 0,
+ "actor": "urn:li:corpuser:unknown"
+ },
+ "lastModified": {
+ "time": 0,
+ "actor": "urn:li:corpuser:unknown"
+ },
+ "hash": "",
+ "platformSchema": {
+ "com.linkedin.pegasus2avro.schema.MySqlDDL": {
+ "tableSchema": ""
+ }
+ },
+ "fields": [
+ {
+ "fieldPath": "property_id",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.pegasus2avro.schema.NumberType": {}
+ }
+ },
+ "nativeDataType": "int",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "[version=2.0].[type=struct].[type=struct].service",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.pegasus2avro.schema.RecordType": {}
+ }
+ },
+ "nativeDataType": "struct>",
+ "recursive": false,
+ "isPartOfKey": false,
+ "jsonProps": "{\"native_data_type\": \"struct>\"}"
+ },
+ {
+ "fieldPath": "[version=2.0].[type=struct].[type=struct].service.[type=string].type",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.pegasus2avro.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false,
+ "jsonProps": "{\"native_data_type\": \"string\", \"_nullable\": true}"
+ },
+ {
+ "fieldPath": "[version=2.0].[type=struct].[type=struct].service.[type=array].[type=int].provider",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.pegasus2avro.schema.ArrayType": {
+ "nestedType": [
+ "int"
+ ]
+ }
+ }
+ },
+ "nativeDataType": "array",
+ "recursive": false,
+ "isPartOfKey": false,
+ "jsonProps": "{\"native_data_type\": \"array\"}"
+ }
+ ]
+ }
+ }
+ ]
+ }
+ },
+ "systemMetadata": {
+ "lastObserved": 1586847600000,
+ "runId": "hive-test",
+ "lastRunId": "no-run-id-provided"
+ }
+},
+{
+ "entityType": "dataset",
+ "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,db1.struct_test_view_materialized,PROD)",
+ "changeType": "UPSERT",
+ "aspectName": "subTypes",
+ "aspect": {
+ "json": {
+ "typeNames": [
+ "Table"
+ ]
+ }
+ },
+ "systemMetadata": {
+ "lastObserved": 1586847600000,
+ "runId": "hive-test",
+ "lastRunId": "no-run-id-provided"
+ }
+},
+{
+ "entityType": "dataset",
+ "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,db1.struct_test_view_materialized,PROD)",
+ "changeType": "UPSERT",
+ "aspectName": "browsePathsV2",
+ "aspect": {
+ "json": {
+ "path": [
+ {
+ "id": "urn:li:container:ded36d15fcfbbb939830549697122661",
+ "urn": "urn:li:container:ded36d15fcfbbb939830549697122661"
+ }
+ ]
+ }
+ },
+ "systemMetadata": {
+ "lastObserved": 1586847600000,
+ "runId": "hive-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1091,7 +1499,8 @@
},
"systemMetadata": {
"lastObserved": 1586847600000,
- "runId": "hive-test"
+ "runId": "hive-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1109,7 +1518,7 @@
"customProperties": {
"Database:": "db1",
"Owner:": "root",
- "CreateTime:": "Wed Jul 05 17:38:30 UTC 2023",
+ "CreateTime:": "Thu Oct 19 13:26:18 UTC 2023",
"LastAccessTime:": "UNKNOWN",
"Retention:": "0",
"Location:": "hdfs://namenode:8020/user/hive/warehouse/db1.db/union_test",
@@ -1119,10 +1528,10 @@
"Table Parameters: numRows": "0",
"Table Parameters: rawDataSize": "0",
"Table Parameters: totalSize": "0",
- "Table Parameters: transient_lastDdlTime": "1688578710",
- "SerDe Library:": "org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe",
- "InputFormat:": "org.apache.hadoop.mapred.TextInputFormat",
- "OutputFormat:": "org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat",
+ "Table Parameters: transient_lastDdlTime": "1697721978",
+ "SerDe Library:": "org.apache.hadoop.hive.ql.io.orc.OrcSerde",
+ "InputFormat:": "org.apache.hadoop.hive.ql.io.orc.OrcInputFormat",
+ "OutputFormat:": "org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat",
"Compressed:": "No",
"Num Buckets:": "-1",
"Bucket Columns:": "[]",
@@ -1285,7 +1694,8 @@
},
"systemMetadata": {
"lastObserved": 1586847600000,
- "runId": "hive-test"
+ "runId": "hive-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1302,7 +1712,8 @@
},
"systemMetadata": {
"lastObserved": 1586847600000,
- "runId": "hive-test"
+ "runId": "hive-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1322,7 +1733,26 @@
},
"systemMetadata": {
"lastObserved": 1586847600000,
- "runId": "hive-test"
+ "runId": "hive-test",
+ "lastRunId": "no-run-id-provided"
+ }
+},
+{
+ "entityType": "dataset",
+ "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,db1.array_struct_test_view,PROD)",
+ "changeType": "UPSERT",
+ "aspectName": "viewProperties",
+ "aspect": {
+ "json": {
+ "materialized": false,
+ "viewLogic": "CREATE VIEW `db1.array_struct_test_view` AS select `array_struct_test`.`property_id`, `array_struct_test`.`service` from `db1`.`array_struct_test`",
+ "viewLanguage": "SQL"
+ }
+ },
+ "systemMetadata": {
+ "lastObserved": 1586847600000,
+ "runId": "hive-test",
+ "lastRunId": "no-run-id-provided"
}
}
]
\ No newline at end of file
diff --git a/metadata-ingestion/tests/integration/hive/hive_setup.sql b/metadata-ingestion/tests/integration/hive/hive_setup.sql
index 8fb8498894bc0..323a78e24d10b 100644
--- a/metadata-ingestion/tests/integration/hive/hive_setup.sql
+++ b/metadata-ingestion/tests/integration/hive/hive_setup.sql
@@ -1,10 +1,10 @@
CREATE DATABASE IF NOT EXISTS db1;
CREATE DATABASE IF NOT EXISTS db2;
-- Setup a "pokes" example table.
-CREATE TABLE IF NOT EXISTS db1.pokes (foo INT, bar STRING);
-LOAD DATA LOCAL INPATH '/opt/hive/examples/files/kv1.txt' OVERWRITE INTO TABLE db1.pokes;
+CREATE TABLE IF NOT EXISTS db1.pokes (foo INT, bar STRING) PARTITIONED BY (baz STRING);
+LOAD DATA LOCAL INPATH '/opt/hive/examples/files/kv1.txt' OVERWRITE INTO TABLE db1.pokes PARTITION (baz='dummy');
-CREATE TABLE IF NOT EXISTS db2.pokes (foo INT, bar STRING, CONSTRAINT pk_1173723383_1683022998392_0 primary key(foo) DISABLE NOVALIDATE NORELY);
+CREATE TABLE IF NOT EXISTS db2.pokes (foo INT, bar STRING);
LOAD DATA LOCAL INPATH '/opt/hive/examples/files/kv1.txt' OVERWRITE INTO TABLE db2.pokes;
-- Setup a table with a special character.
@@ -23,12 +23,12 @@ CREATE TABLE IF NOT EXISTS db1.struct_test
CREATE TABLE IF NOT EXISTS db1.array_struct_test
(
- property_id INT,
+ property_id INT COMMENT 'id of property',
service array
- >>
-);
+ >> COMMENT 'service types and providers'
+) TBLPROPERTIES ('comment' = 'This table has array of structs', 'another.comment' = 'This table has no partitions');;
WITH
test_data as (
@@ -39,6 +39,9 @@ test_data as (
INSERT INTO TABLE db1.array_struct_test
select * from test_data;
+CREATE MATERIALIZED VIEW db1.struct_test_view_materialized as select * from db1.struct_test;
+CREATE VIEW db1.array_struct_test_view as select * from db1.array_struct_test;
+
CREATE TABLE IF NOT EXISTS db1.nested_struct_test
(
property_id INT,
@@ -50,9 +53,6 @@ CREATE TABLE IF NOT EXISTS db1.nested_struct_test
CREATE TABLE db1.union_test(
foo UNIONTYPE, struct, struct>
-);
+) STORED AS ORC ;
-CREATE TABLE db1.map_test(
- KeyValue String,
- RecordId map
-);
\ No newline at end of file
+CREATE TABLE db1.map_test(KeyValue String, RecordId map);
\ No newline at end of file
diff --git a/metadata-ingestion/tests/integration/mysql/mysql_mces_no_db_golden.json b/metadata-ingestion/tests/integration/mysql/mysql_mces_no_db_golden.json
index 4aaefb48d33e1..38b03ce238d1c 100644
--- a/metadata-ingestion/tests/integration/mysql/mysql_mces_no_db_golden.json
+++ b/metadata-ingestion/tests/integration/mysql/mysql_mces_no_db_golden.json
@@ -16,7 +16,8 @@
},
"systemMetadata": {
"lastObserved": 1586847600000,
- "runId": "mysql-test"
+ "runId": "mysql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -31,7 +32,8 @@
},
"systemMetadata": {
"lastObserved": 1586847600000,
- "runId": "mysql-test"
+ "runId": "mysql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -46,7 +48,8 @@
},
"systemMetadata": {
"lastObserved": 1586847600000,
- "runId": "mysql-test"
+ "runId": "mysql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -63,7 +66,8 @@
},
"systemMetadata": {
"lastObserved": 1586847600000,
- "runId": "mysql-test"
+ "runId": "mysql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -78,7 +82,8 @@
},
"systemMetadata": {
"lastObserved": 1586847600000,
- "runId": "mysql-test"
+ "runId": "mysql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -93,7 +98,8 @@
},
"systemMetadata": {
"lastObserved": 1586847600000,
- "runId": "mysql-test"
+ "runId": "mysql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -213,7 +219,8 @@
},
"systemMetadata": {
"lastObserved": 1586847600000,
- "runId": "mysql-test"
+ "runId": "mysql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -230,7 +237,8 @@
},
"systemMetadata": {
"lastObserved": 1586847600000,
- "runId": "mysql-test"
+ "runId": "mysql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -250,7 +258,8 @@
},
"systemMetadata": {
"lastObserved": 1586847600000,
- "runId": "mysql-test"
+ "runId": "mysql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -265,7 +274,8 @@
},
"systemMetadata": {
"lastObserved": 1586847600000,
- "runId": "mysql-test"
+ "runId": "mysql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -361,7 +371,8 @@
},
"systemMetadata": {
"lastObserved": 1586847600000,
- "runId": "mysql-test"
+ "runId": "mysql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -378,7 +389,8 @@
},
"systemMetadata": {
"lastObserved": 1586847600000,
- "runId": "mysql-test"
+ "runId": "mysql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -398,7 +410,8 @@
},
"systemMetadata": {
"lastObserved": 1586847600000,
- "runId": "mysql-test"
+ "runId": "mysql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -554,7 +567,8 @@
},
"systemMetadata": {
"lastObserved": 1586847600000,
- "runId": "mysql-test"
+ "runId": "mysql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -969,7 +983,8 @@
},
"systemMetadata": {
"lastObserved": 1586847600000,
- "runId": "mysql-test"
+ "runId": "mysql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -989,7 +1004,8 @@
},
"systemMetadata": {
"lastObserved": 1586847600000,
- "runId": "mysql-test"
+ "runId": "mysql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1004,7 +1020,8 @@
},
"systemMetadata": {
"lastObserved": 1586847600000,
- "runId": "mysql-test"
+ "runId": "mysql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1019,7 +1036,8 @@
},
"systemMetadata": {
"lastObserved": 1586847600000,
- "runId": "mysql-test"
+ "runId": "mysql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1036,7 +1054,8 @@
},
"systemMetadata": {
"lastObserved": 1586847600000,
- "runId": "mysql-test"
+ "runId": "mysql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1053,7 +1072,8 @@
},
"systemMetadata": {
"lastObserved": 1586847600000,
- "runId": "mysql-test"
+ "runId": "mysql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1068,7 +1088,8 @@
},
"systemMetadata": {
"lastObserved": 1586847600000,
- "runId": "mysql-test"
+ "runId": "mysql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1083,7 +1104,8 @@
},
"systemMetadata": {
"lastObserved": 1586847600000,
- "runId": "mysql-test"
+ "runId": "mysql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1215,7 +1237,8 @@
},
"systemMetadata": {
"lastObserved": 1586847600000,
- "runId": "mysql-test"
+ "runId": "mysql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1232,7 +1255,8 @@
},
"systemMetadata": {
"lastObserved": 1586847600000,
- "runId": "mysql-test"
+ "runId": "mysql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1249,7 +1273,8 @@
},
"systemMetadata": {
"lastObserved": 1586847600000,
- "runId": "mysql-test"
+ "runId": "mysql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1269,7 +1294,8 @@
},
"systemMetadata": {
"lastObserved": 1586847600000,
- "runId": "mysql-test"
+ "runId": "mysql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1284,7 +1310,8 @@
},
"systemMetadata": {
"lastObserved": 1586847600000,
- "runId": "mysql-test"
+ "runId": "mysql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1418,7 +1445,8 @@
},
"systemMetadata": {
"lastObserved": 1586847600000,
- "runId": "mysql-test"
+ "runId": "mysql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1435,7 +1463,8 @@
},
"systemMetadata": {
"lastObserved": 1586847600000,
- "runId": "mysql-test"
+ "runId": "mysql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1452,7 +1481,8 @@
},
"systemMetadata": {
"lastObserved": 1586847600000,
- "runId": "mysql-test"
+ "runId": "mysql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1472,7 +1502,8 @@
},
"systemMetadata": {
"lastObserved": 1586847600000,
- "runId": "mysql-test"
+ "runId": "mysql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1487,7 +1518,8 @@
},
"systemMetadata": {
"lastObserved": 1586847600000,
- "runId": "mysql-test"
+ "runId": "mysql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1586,7 +1618,8 @@
},
"systemMetadata": {
"lastObserved": 1586847600000,
- "runId": "mysql-test"
+ "runId": "mysql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1603,7 +1636,8 @@
},
"systemMetadata": {
"lastObserved": 1586847600000,
- "runId": "mysql-test"
+ "runId": "mysql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1620,7 +1654,8 @@
},
"systemMetadata": {
"lastObserved": 1586847600000,
- "runId": "mysql-test"
+ "runId": "mysql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1637,7 +1672,8 @@
},
"systemMetadata": {
"lastObserved": 1586847600000,
- "runId": "mysql-test"
+ "runId": "mysql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1657,7 +1693,8 @@
},
"systemMetadata": {
"lastObserved": 1586847600000,
- "runId": "mysql-test"
+ "runId": "mysql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1677,7 +1714,8 @@
},
"systemMetadata": {
"lastObserved": 1586847600000,
- "runId": "mysql-test"
+ "runId": "mysql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1692,7 +1730,8 @@
},
"systemMetadata": {
"lastObserved": 1586847600000,
- "runId": "mysql-test"
+ "runId": "mysql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1707,7 +1746,8 @@
},
"systemMetadata": {
"lastObserved": 1586847600000,
- "runId": "mysql-test"
+ "runId": "mysql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1724,7 +1764,8 @@
},
"systemMetadata": {
"lastObserved": 1586847600000,
- "runId": "mysql-test"
+ "runId": "mysql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1739,7 +1780,8 @@
},
"systemMetadata": {
"lastObserved": 1586847600000,
- "runId": "mysql-test"
+ "runId": "mysql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1754,7 +1796,8 @@
},
"systemMetadata": {
"lastObserved": 1586847600000,
- "runId": "mysql-test"
+ "runId": "mysql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1874,7 +1917,8 @@
},
"systemMetadata": {
"lastObserved": 1586847600000,
- "runId": "mysql-test"
+ "runId": "mysql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1891,7 +1935,8 @@
},
"systemMetadata": {
"lastObserved": 1586847600000,
- "runId": "mysql-test"
+ "runId": "mysql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1911,7 +1956,8 @@
},
"systemMetadata": {
"lastObserved": 1586847600000,
- "runId": "mysql-test"
+ "runId": "mysql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1926,7 +1972,8 @@
},
"systemMetadata": {
"lastObserved": 1586847600000,
- "runId": "mysql-test"
+ "runId": "mysql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -2022,7 +2069,8 @@
},
"systemMetadata": {
"lastObserved": 1586847600000,
- "runId": "mysql-test"
+ "runId": "mysql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -2039,7 +2087,8 @@
},
"systemMetadata": {
"lastObserved": 1586847600000,
- "runId": "mysql-test"
+ "runId": "mysql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -2059,7 +2108,8 @@
},
"systemMetadata": {
"lastObserved": 1586847600000,
- "runId": "mysql-test"
+ "runId": "mysql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -2182,7 +2232,8 @@
},
"systemMetadata": {
"lastObserved": 1586847600000,
- "runId": "mysql-test"
+ "runId": "mysql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -2233,7 +2284,8 @@
},
"systemMetadata": {
"lastObserved": 1586847600000,
- "runId": "mysql-test"
+ "runId": "mysql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -2253,7 +2305,8 @@
},
"systemMetadata": {
"lastObserved": 1586847600000,
- "runId": "mysql-test"
+ "runId": "mysql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -2268,7 +2321,8 @@
},
"systemMetadata": {
"lastObserved": 1586847600000,
- "runId": "mysql-test"
+ "runId": "mysql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -2283,7 +2337,8 @@
},
"systemMetadata": {
"lastObserved": 1586847600000,
- "runId": "mysql-test"
+ "runId": "mysql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -2300,7 +2355,8 @@
},
"systemMetadata": {
"lastObserved": 1586847600000,
- "runId": "mysql-test"
+ "runId": "mysql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -2315,7 +2371,8 @@
},
"systemMetadata": {
"lastObserved": 1586847600000,
- "runId": "mysql-test"
+ "runId": "mysql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -2330,7 +2387,8 @@
},
"systemMetadata": {
"lastObserved": 1586847600000,
- "runId": "mysql-test"
+ "runId": "mysql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -2390,7 +2448,8 @@
},
"systemMetadata": {
"lastObserved": 1586847600000,
- "runId": "mysql-test"
+ "runId": "mysql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -2407,7 +2466,8 @@
},
"systemMetadata": {
"lastObserved": 1586847600000,
- "runId": "mysql-test"
+ "runId": "mysql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -2427,7 +2487,8 @@
},
"systemMetadata": {
"lastObserved": 1586847600000,
- "runId": "mysql-test"
+ "runId": "mysql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -2442,7 +2503,8 @@
},
"systemMetadata": {
"lastObserved": 1586847600000,
- "runId": "mysql-test"
+ "runId": "mysql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -2502,7 +2564,8 @@
},
"systemMetadata": {
"lastObserved": 1586847600000,
- "runId": "mysql-test"
+ "runId": "mysql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -2519,7 +2582,8 @@
},
"systemMetadata": {
"lastObserved": 1586847600000,
- "runId": "mysql-test"
+ "runId": "mysql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -2539,7 +2603,8 @@
},
"systemMetadata": {
"lastObserved": 1586847600000,
- "runId": "mysql-test"
+ "runId": "mysql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -2568,7 +2633,8 @@
},
"systemMetadata": {
"lastObserved": 1586847600000,
- "runId": "mysql-test"
+ "runId": "mysql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -2597,7 +2663,79 @@
},
"systemMetadata": {
"lastObserved": 1586847600000,
- "runId": "mysql-test"
+ "runId": "mysql-test",
+ "lastRunId": "no-run-id-provided"
+ }
+},
+{
+ "entityType": "dataset",
+ "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mysql,metagalaxy.metadata_index_view,PROD)",
+ "changeType": "UPSERT",
+ "aspectName": "upstreamLineage",
+ "aspect": {
+ "json": {
+ "upstreams": [
+ {
+ "auditStamp": {
+ "time": 0,
+ "actor": "urn:li:corpuser:unknown"
+ },
+ "dataset": "urn:li:dataset:(urn:li:dataPlatform:mysql,metagalaxy.metadata_index,PROD)",
+ "type": "VIEW"
+ }
+ ],
+ "fineGrainedLineages": [
+ {
+ "upstreamType": "FIELD_SET",
+ "upstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mysql,metagalaxy.metadata_index,PROD),doubleVal)"
+ ],
+ "downstreamType": "FIELD",
+ "downstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mysql,metagalaxy.metadata_index_view,PROD),doubleVal)"
+ ],
+ "confidenceScore": 1.0
+ },
+ {
+ "upstreamType": "FIELD_SET",
+ "upstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mysql,metagalaxy.metadata_index,PROD),id)"
+ ],
+ "downstreamType": "FIELD",
+ "downstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mysql,metagalaxy.metadata_index_view,PROD),id)"
+ ],
+ "confidenceScore": 1.0
+ },
+ {
+ "upstreamType": "FIELD_SET",
+ "upstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mysql,metagalaxy.metadata_index,PROD),path)"
+ ],
+ "downstreamType": "FIELD",
+ "downstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mysql,metagalaxy.metadata_index_view,PROD),path)"
+ ],
+ "confidenceScore": 1.0
+ },
+ {
+ "upstreamType": "FIELD_SET",
+ "upstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mysql,metagalaxy.metadata_index,PROD),urn)"
+ ],
+ "downstreamType": "FIELD",
+ "downstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mysql,metagalaxy.metadata_index_view,PROD),urn)"
+ ],
+ "confidenceScore": 1.0
+ }
+ ]
+ }
+ },
+ "systemMetadata": {
+ "lastObserved": 1586847600000,
+ "runId": "mysql-test",
+ "lastRunId": "no-run-id-provided"
}
}
]
\ No newline at end of file
diff --git a/metadata-ingestion/tests/integration/postgres/postgres_all_db_mces_with_db_golden.json b/metadata-ingestion/tests/integration/postgres/postgres_all_db_mces_with_db_golden.json
index 535ce964c6058..b9b2a3b2141a8 100644
--- a/metadata-ingestion/tests/integration/postgres/postgres_all_db_mces_with_db_golden.json
+++ b/metadata-ingestion/tests/integration/postgres/postgres_all_db_mces_with_db_golden.json
@@ -16,7 +16,8 @@
},
"systemMetadata": {
"lastObserved": 1646575200000,
- "runId": "postgres-test"
+ "runId": "postgres-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -31,7 +32,8 @@
},
"systemMetadata": {
"lastObserved": 1646575200000,
- "runId": "postgres-test"
+ "runId": "postgres-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -46,7 +48,8 @@
},
"systemMetadata": {
"lastObserved": 1646575200000,
- "runId": "postgres-test"
+ "runId": "postgres-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -63,7 +66,8 @@
},
"systemMetadata": {
"lastObserved": 1646575200000,
- "runId": "postgres-test"
+ "runId": "postgres-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -78,7 +82,8 @@
},
"systemMetadata": {
"lastObserved": 1646575200000,
- "runId": "postgres-test"
+ "runId": "postgres-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -99,7 +104,8 @@
},
"systemMetadata": {
"lastObserved": 1646575200000,
- "runId": "postgres-test"
+ "runId": "postgres-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -114,7 +120,8 @@
},
"systemMetadata": {
"lastObserved": 1646575200000,
- "runId": "postgres-test"
+ "runId": "postgres-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -129,7 +136,8 @@
},
"systemMetadata": {
"lastObserved": 1646575200000,
- "runId": "postgres-test"
+ "runId": "postgres-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -146,7 +154,8 @@
},
"systemMetadata": {
"lastObserved": 1646575200000,
- "runId": "postgres-test"
+ "runId": "postgres-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -161,7 +170,8 @@
},
"systemMetadata": {
"lastObserved": 1646575200000,
- "runId": "postgres-test"
+ "runId": "postgres-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -181,7 +191,8 @@
},
"systemMetadata": {
"lastObserved": 1646575200000,
- "runId": "postgres-test"
+ "runId": "postgres-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -201,7 +212,8 @@
},
"systemMetadata": {
"lastObserved": 1646575200000,
- "runId": "postgres-test"
+ "runId": "postgres-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -216,7 +228,8 @@
},
"systemMetadata": {
"lastObserved": 1646575200000,
- "runId": "postgres-test"
+ "runId": "postgres-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -231,7 +244,8 @@
},
"systemMetadata": {
"lastObserved": 1646575200000,
- "runId": "postgres-test"
+ "runId": "postgres-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -248,7 +262,8 @@
},
"systemMetadata": {
"lastObserved": 1646575200000,
- "runId": "postgres-test"
+ "runId": "postgres-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -263,7 +278,8 @@
},
"systemMetadata": {
"lastObserved": 1646575200000,
- "runId": "postgres-test"
+ "runId": "postgres-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -284,7 +300,8 @@
},
"systemMetadata": {
"lastObserved": 1646575200000,
- "runId": "postgres-test"
+ "runId": "postgres-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -299,7 +316,8 @@
},
"systemMetadata": {
"lastObserved": 1646575200000,
- "runId": "postgres-test"
+ "runId": "postgres-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -314,7 +332,8 @@
},
"systemMetadata": {
"lastObserved": 1646575200000,
- "runId": "postgres-test"
+ "runId": "postgres-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -331,7 +350,8 @@
},
"systemMetadata": {
"lastObserved": 1646575200000,
- "runId": "postgres-test"
+ "runId": "postgres-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -346,7 +366,8 @@
},
"systemMetadata": {
"lastObserved": 1646575200000,
- "runId": "postgres-test"
+ "runId": "postgres-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -366,7 +387,8 @@
},
"systemMetadata": {
"lastObserved": 1646575200000,
- "runId": "postgres-test"
+ "runId": "postgres-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -381,7 +403,8 @@
},
"systemMetadata": {
"lastObserved": 1646575200000,
- "runId": "postgres-test"
+ "runId": "postgres-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -537,7 +560,8 @@
},
"systemMetadata": {
"lastObserved": 1646575200000,
- "runId": "postgres-test"
+ "runId": "postgres-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -554,7 +578,186 @@
},
"systemMetadata": {
"lastObserved": 1646575200000,
- "runId": "postgres-test"
+ "runId": "postgres-test",
+ "lastRunId": "no-run-id-provided"
+ }
+},
+{
+ "entityType": "dataset",
+ "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:postgres,postgrestest.public.metadata_aspect_v2,PROD)",
+ "changeType": "UPSERT",
+ "aspectName": "browsePathsV2",
+ "aspect": {
+ "json": {
+ "path": [
+ {
+ "id": "urn:li:container:a6097853edba03be190d99ece4b307ff",
+ "urn": "urn:li:container:a6097853edba03be190d99ece4b307ff"
+ },
+ {
+ "id": "urn:li:container:51904fc8cd5cc729bc630decff284525",
+ "urn": "urn:li:container:51904fc8cd5cc729bc630decff284525"
+ }
+ ]
+ }
+ },
+ "systemMetadata": {
+ "lastObserved": 1646575200000,
+ "runId": "postgres-test",
+ "lastRunId": "no-run-id-provided"
+ }
+},
+{
+ "entityType": "dataset",
+ "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:postgres,postgrestest.public.metadata_aspect_view,PROD)",
+ "changeType": "UPSERT",
+ "aspectName": "container",
+ "aspect": {
+ "json": {
+ "container": "urn:li:container:51904fc8cd5cc729bc630decff284525"
+ }
+ },
+ "systemMetadata": {
+ "lastObserved": 1646575200000,
+ "runId": "postgres-test",
+ "lastRunId": "no-run-id-provided"
+ }
+},
+{
+ "proposedSnapshot": {
+ "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": {
+ "urn": "urn:li:dataset:(urn:li:dataPlatform:postgres,postgrestest.public.metadata_aspect_view,PROD)",
+ "aspects": [
+ {
+ "com.linkedin.pegasus2avro.common.Status": {
+ "removed": false
+ }
+ },
+ {
+ "com.linkedin.pegasus2avro.dataset.DatasetProperties": {
+ "customProperties": {
+ "view_definition": " SELECT metadata_aspect_v2.urn,\n metadata_aspect_v2.aspect\n FROM metadata_aspect_v2\n WHERE (metadata_aspect_v2.version = 0);",
+ "is_view": "True"
+ },
+ "name": "metadata_aspect_view",
+ "tags": []
+ }
+ },
+ {
+ "com.linkedin.pegasus2avro.schema.SchemaMetadata": {
+ "schemaName": "postgrestest.public.metadata_aspect_view",
+ "platform": "urn:li:dataPlatform:postgres",
+ "version": 0,
+ "created": {
+ "time": 0,
+ "actor": "urn:li:corpuser:unknown"
+ },
+ "lastModified": {
+ "time": 0,
+ "actor": "urn:li:corpuser:unknown"
+ },
+ "hash": "",
+ "platformSchema": {
+ "com.linkedin.pegasus2avro.schema.MySqlDDL": {
+ "tableSchema": ""
+ }
+ },
+ "fields": [
+ {
+ "fieldPath": "urn",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.pegasus2avro.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "VARCHAR(length=500)",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "aspect",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.pegasus2avro.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "VARCHAR(length=200)",
+ "recursive": false,
+ "isPartOfKey": false
+ }
+ ]
+ }
+ }
+ ]
+ }
+ },
+ "systemMetadata": {
+ "lastObserved": 1646575200000,
+ "runId": "postgres-test",
+ "lastRunId": "no-run-id-provided"
+ }
+},
+{
+ "entityType": "dataset",
+ "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:postgres,postgrestest.public.metadata_aspect_view,PROD)",
+ "changeType": "UPSERT",
+ "aspectName": "subTypes",
+ "aspect": {
+ "json": {
+ "typeNames": [
+ "View"
+ ]
+ }
+ },
+ "systemMetadata": {
+ "lastObserved": 1646575200000,
+ "runId": "postgres-test",
+ "lastRunId": "no-run-id-provided"
+ }
+},
+{
+ "entityType": "dataset",
+ "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:postgres,postgrestest.public.metadata_aspect_view,PROD)",
+ "changeType": "UPSERT",
+ "aspectName": "viewProperties",
+ "aspect": {
+ "json": {
+ "materialized": false,
+ "viewLogic": " SELECT metadata_aspect_v2.urn,\n metadata_aspect_v2.aspect\n FROM metadata_aspect_v2\n WHERE (metadata_aspect_v2.version = 0);",
+ "viewLanguage": "SQL"
+ }
+ },
+ "systemMetadata": {
+ "lastObserved": 1646575200000,
+ "runId": "postgres-test",
+ "lastRunId": "no-run-id-provided"
+ }
+},
+{
+ "entityType": "dataset",
+ "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:postgres,postgrestest.public.metadata_aspect_view,PROD)",
+ "changeType": "UPSERT",
+ "aspectName": "browsePathsV2",
+ "aspect": {
+ "json": {
+ "path": [
+ {
+ "id": "urn:li:container:a6097853edba03be190d99ece4b307ff",
+ "urn": "urn:li:container:a6097853edba03be190d99ece4b307ff"
+ },
+ {
+ "id": "urn:li:container:51904fc8cd5cc729bc630decff284525",
+ "urn": "urn:li:container:51904fc8cd5cc729bc630decff284525"
+ }
+ ]
+ }
+ },
+ "systemMetadata": {
+ "lastObserved": 1646575200000,
+ "runId": "postgres-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -634,31 +837,8 @@
},
"systemMetadata": {
"lastObserved": 1646575200000,
- "runId": "postgres-test"
- }
-},
-{
- "entityType": "dataset",
- "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:postgres,postgrestest.public.metadata_aspect_v2,PROD)",
- "changeType": "UPSERT",
- "aspectName": "browsePathsV2",
- "aspect": {
- "json": {
- "path": [
- {
- "id": "urn:li:container:a6097853edba03be190d99ece4b307ff",
- "urn": "urn:li:container:a6097853edba03be190d99ece4b307ff"
- },
- {
- "id": "urn:li:container:51904fc8cd5cc729bc630decff284525",
- "urn": "urn:li:container:51904fc8cd5cc729bc630decff284525"
- }
- ]
- }
- },
- "systemMetadata": {
- "lastObserved": 1646575200000,
- "runId": "postgres-test"
+ "runId": "postgres-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -675,29 +855,39 @@
"actor": "urn:li:corpuser:unknown"
},
"dataset": "urn:li:dataset:(urn:li:dataPlatform:postgres,postgrestest.public.metadata_aspect_v2,PROD)",
- "type": "TRANSFORMED"
+ "type": "VIEW"
+ }
+ ],
+ "fineGrainedLineages": [
+ {
+ "upstreamType": "FIELD_SET",
+ "upstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,postgrestest.public.metadata_aspect_v2,PROD),aspect)"
+ ],
+ "downstreamType": "FIELD",
+ "downstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,postgrestest.public.metadata_aspect_view,PROD),aspect)"
+ ],
+ "confidenceScore": 1.0
+ },
+ {
+ "upstreamType": "FIELD_SET",
+ "upstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,postgrestest.public.metadata_aspect_v2,PROD),urn)"
+ ],
+ "downstreamType": "FIELD",
+ "downstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,postgrestest.public.metadata_aspect_view,PROD),urn)"
+ ],
+ "confidenceScore": 1.0
}
]
}
},
"systemMetadata": {
"lastObserved": 1646575200000,
- "runId": "postgres-test"
- }
-},
-{
- "entityType": "dataset",
- "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:postgres,postgrestest.public.metadata_aspect_view,PROD)",
- "changeType": "UPSERT",
- "aspectName": "status",
- "aspect": {
- "json": {
- "removed": false
- }
- },
- "systemMetadata": {
- "lastObserved": 1646575200000,
- "runId": "postgres-test"
+ "runId": "postgres-test",
+ "lastRunId": "no-run-id-provided"
}
}
]
\ No newline at end of file
diff --git a/metadata-ingestion/tests/integration/postgres/postgres_all_db_to_file_with_db_estimate_row_count.yml b/metadata-ingestion/tests/integration/postgres/postgres_all_db_to_file_with_db_estimate_row_count.yml
index b390d9246677e..2bfa39a65363b 100644
--- a/metadata-ingestion/tests/integration/postgres/postgres_all_db_to_file_with_db_estimate_row_count.yml
+++ b/metadata-ingestion/tests/integration/postgres/postgres_all_db_to_file_with_db_estimate_row_count.yml
@@ -25,7 +25,7 @@ source:
include_field_distinct_value_frequencies: false
include_field_histogram: false
catch_exceptions: true
- include_views: false
+ include_views: true
sink:
type: file
config:
diff --git a/metadata-ingestion/tests/integration/postgres/postgres_mces_with_db_golden.json b/metadata-ingestion/tests/integration/postgres/postgres_mces_with_db_golden.json
index bf36a39a8c103..f6fa0a0ed032e 100644
--- a/metadata-ingestion/tests/integration/postgres/postgres_mces_with_db_golden.json
+++ b/metadata-ingestion/tests/integration/postgres/postgres_mces_with_db_golden.json
@@ -16,7 +16,8 @@
},
"systemMetadata": {
"lastObserved": 1646575200000,
- "runId": "postgres-test"
+ "runId": "postgres-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -31,7 +32,8 @@
},
"systemMetadata": {
"lastObserved": 1646575200000,
- "runId": "postgres-test"
+ "runId": "postgres-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -46,7 +48,8 @@
},
"systemMetadata": {
"lastObserved": 1646575200000,
- "runId": "postgres-test"
+ "runId": "postgres-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -63,7 +66,8 @@
},
"systemMetadata": {
"lastObserved": 1646575200000,
- "runId": "postgres-test"
+ "runId": "postgres-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -78,7 +82,8 @@
},
"systemMetadata": {
"lastObserved": 1646575200000,
- "runId": "postgres-test"
+ "runId": "postgres-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -99,7 +104,8 @@
},
"systemMetadata": {
"lastObserved": 1646575200000,
- "runId": "postgres-test"
+ "runId": "postgres-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -114,7 +120,8 @@
},
"systemMetadata": {
"lastObserved": 1646575200000,
- "runId": "postgres-test"
+ "runId": "postgres-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -129,7 +136,8 @@
},
"systemMetadata": {
"lastObserved": 1646575200000,
- "runId": "postgres-test"
+ "runId": "postgres-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -146,7 +154,8 @@
},
"systemMetadata": {
"lastObserved": 1646575200000,
- "runId": "postgres-test"
+ "runId": "postgres-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -161,7 +170,8 @@
},
"systemMetadata": {
"lastObserved": 1646575200000,
- "runId": "postgres-test"
+ "runId": "postgres-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -181,7 +191,8 @@
},
"systemMetadata": {
"lastObserved": 1646575200000,
- "runId": "postgres-test"
+ "runId": "postgres-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -196,7 +207,8 @@
},
"systemMetadata": {
"lastObserved": 1646575200000,
- "runId": "postgres-test"
+ "runId": "postgres-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -352,7 +364,8 @@
},
"systemMetadata": {
"lastObserved": 1646575200000,
- "runId": "postgres-test"
+ "runId": "postgres-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -369,7 +382,186 @@
},
"systemMetadata": {
"lastObserved": 1646575200000,
- "runId": "postgres-test"
+ "runId": "postgres-test",
+ "lastRunId": "no-run-id-provided"
+ }
+},
+{
+ "entityType": "dataset",
+ "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:postgres,postgrestest.public.metadata_aspect_v2,PROD)",
+ "changeType": "UPSERT",
+ "aspectName": "browsePathsV2",
+ "aspect": {
+ "json": {
+ "path": [
+ {
+ "id": "urn:li:container:a6097853edba03be190d99ece4b307ff",
+ "urn": "urn:li:container:a6097853edba03be190d99ece4b307ff"
+ },
+ {
+ "id": "urn:li:container:51904fc8cd5cc729bc630decff284525",
+ "urn": "urn:li:container:51904fc8cd5cc729bc630decff284525"
+ }
+ ]
+ }
+ },
+ "systemMetadata": {
+ "lastObserved": 1646575200000,
+ "runId": "postgres-test",
+ "lastRunId": "no-run-id-provided"
+ }
+},
+{
+ "entityType": "dataset",
+ "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:postgres,postgrestest.public.metadata_aspect_view,PROD)",
+ "changeType": "UPSERT",
+ "aspectName": "container",
+ "aspect": {
+ "json": {
+ "container": "urn:li:container:51904fc8cd5cc729bc630decff284525"
+ }
+ },
+ "systemMetadata": {
+ "lastObserved": 1646575200000,
+ "runId": "postgres-test",
+ "lastRunId": "no-run-id-provided"
+ }
+},
+{
+ "proposedSnapshot": {
+ "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": {
+ "urn": "urn:li:dataset:(urn:li:dataPlatform:postgres,postgrestest.public.metadata_aspect_view,PROD)",
+ "aspects": [
+ {
+ "com.linkedin.pegasus2avro.common.Status": {
+ "removed": false
+ }
+ },
+ {
+ "com.linkedin.pegasus2avro.dataset.DatasetProperties": {
+ "customProperties": {
+ "view_definition": " SELECT metadata_aspect_v2.urn,\n metadata_aspect_v2.aspect\n FROM metadata_aspect_v2\n WHERE (metadata_aspect_v2.version = 0);",
+ "is_view": "True"
+ },
+ "name": "metadata_aspect_view",
+ "tags": []
+ }
+ },
+ {
+ "com.linkedin.pegasus2avro.schema.SchemaMetadata": {
+ "schemaName": "postgrestest.public.metadata_aspect_view",
+ "platform": "urn:li:dataPlatform:postgres",
+ "version": 0,
+ "created": {
+ "time": 0,
+ "actor": "urn:li:corpuser:unknown"
+ },
+ "lastModified": {
+ "time": 0,
+ "actor": "urn:li:corpuser:unknown"
+ },
+ "hash": "",
+ "platformSchema": {
+ "com.linkedin.pegasus2avro.schema.MySqlDDL": {
+ "tableSchema": ""
+ }
+ },
+ "fields": [
+ {
+ "fieldPath": "urn",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.pegasus2avro.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "VARCHAR(length=500)",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "aspect",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.pegasus2avro.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "VARCHAR(length=200)",
+ "recursive": false,
+ "isPartOfKey": false
+ }
+ ]
+ }
+ }
+ ]
+ }
+ },
+ "systemMetadata": {
+ "lastObserved": 1646575200000,
+ "runId": "postgres-test",
+ "lastRunId": "no-run-id-provided"
+ }
+},
+{
+ "entityType": "dataset",
+ "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:postgres,postgrestest.public.metadata_aspect_view,PROD)",
+ "changeType": "UPSERT",
+ "aspectName": "subTypes",
+ "aspect": {
+ "json": {
+ "typeNames": [
+ "View"
+ ]
+ }
+ },
+ "systemMetadata": {
+ "lastObserved": 1646575200000,
+ "runId": "postgres-test",
+ "lastRunId": "no-run-id-provided"
+ }
+},
+{
+ "entityType": "dataset",
+ "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:postgres,postgrestest.public.metadata_aspect_view,PROD)",
+ "changeType": "UPSERT",
+ "aspectName": "viewProperties",
+ "aspect": {
+ "json": {
+ "materialized": false,
+ "viewLogic": " SELECT metadata_aspect_v2.urn,\n metadata_aspect_v2.aspect\n FROM metadata_aspect_v2\n WHERE (metadata_aspect_v2.version = 0);",
+ "viewLanguage": "SQL"
+ }
+ },
+ "systemMetadata": {
+ "lastObserved": 1646575200000,
+ "runId": "postgres-test",
+ "lastRunId": "no-run-id-provided"
+ }
+},
+{
+ "entityType": "dataset",
+ "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:postgres,postgrestest.public.metadata_aspect_view,PROD)",
+ "changeType": "UPSERT",
+ "aspectName": "browsePathsV2",
+ "aspect": {
+ "json": {
+ "path": [
+ {
+ "id": "urn:li:container:a6097853edba03be190d99ece4b307ff",
+ "urn": "urn:li:container:a6097853edba03be190d99ece4b307ff"
+ },
+ {
+ "id": "urn:li:container:51904fc8cd5cc729bc630decff284525",
+ "urn": "urn:li:container:51904fc8cd5cc729bc630decff284525"
+ }
+ ]
+ }
+ },
+ "systemMetadata": {
+ "lastObserved": 1646575200000,
+ "runId": "postgres-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -391,31 +583,57 @@
},
"systemMetadata": {
"lastObserved": 1646575200000,
- "runId": "postgres-test"
+ "runId": "postgres-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
"entityType": "dataset",
- "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:postgres,postgrestest.public.metadata_aspect_v2,PROD)",
+ "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:postgres,postgrestest.public.metadata_aspect_view,PROD)",
"changeType": "UPSERT",
- "aspectName": "browsePathsV2",
+ "aspectName": "upstreamLineage",
"aspect": {
"json": {
- "path": [
+ "upstreams": [
{
- "id": "urn:li:container:a6097853edba03be190d99ece4b307ff",
- "urn": "urn:li:container:a6097853edba03be190d99ece4b307ff"
+ "auditStamp": {
+ "time": 0,
+ "actor": "urn:li:corpuser:unknown"
+ },
+ "dataset": "urn:li:dataset:(urn:li:dataPlatform:postgres,postgrestest.public.metadata_aspect_v2,PROD)",
+ "type": "VIEW"
+ }
+ ],
+ "fineGrainedLineages": [
+ {
+ "upstreamType": "FIELD_SET",
+ "upstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,postgrestest.public.metadata_aspect_v2,PROD),aspect)"
+ ],
+ "downstreamType": "FIELD",
+ "downstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,postgrestest.public.metadata_aspect_view,PROD),aspect)"
+ ],
+ "confidenceScore": 1.0
},
{
- "id": "urn:li:container:51904fc8cd5cc729bc630decff284525",
- "urn": "urn:li:container:51904fc8cd5cc729bc630decff284525"
+ "upstreamType": "FIELD_SET",
+ "upstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,postgrestest.public.metadata_aspect_v2,PROD),urn)"
+ ],
+ "downstreamType": "FIELD",
+ "downstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,postgrestest.public.metadata_aspect_view,PROD),urn)"
+ ],
+ "confidenceScore": 1.0
}
]
}
},
"systemMetadata": {
"lastObserved": 1646575200000,
- "runId": "postgres-test"
+ "runId": "postgres-test",
+ "lastRunId": "no-run-id-provided"
}
}
]
\ No newline at end of file
diff --git a/metadata-ingestion/tests/integration/postgres/postgres_to_file_with_db_estimate_row_count.yml b/metadata-ingestion/tests/integration/postgres/postgres_to_file_with_db_estimate_row_count.yml
index a489877d52a23..4a2cc543f2d01 100644
--- a/metadata-ingestion/tests/integration/postgres/postgres_to_file_with_db_estimate_row_count.yml
+++ b/metadata-ingestion/tests/integration/postgres/postgres_to_file_with_db_estimate_row_count.yml
@@ -13,7 +13,7 @@ source:
profile_table_row_count_estimate_only: true
turn_off_expensive_profiling_metrics: true
catch_exceptions: true
- include_views: false
+ include_views: true
sink:
type: file
config:
diff --git a/metadata-ingestion/tests/integration/snowflake/test_snowflake_failures.py b/metadata-ingestion/tests/integration/snowflake/test_snowflake_failures.py
index cd53b8f7db4f6..4b0dd2b1045a3 100644
--- a/metadata-ingestion/tests/integration/snowflake/test_snowflake_failures.py
+++ b/metadata-ingestion/tests/integration/snowflake/test_snowflake_failures.py
@@ -287,8 +287,9 @@ def test_snowflake_unexpected_snowflake_view_lineage_error_causes_pipeline_warni
SnowflakeV2Config,
cast(PipelineConfig, snowflake_pipeline_config1).source.config,
)
+ config.include_table_lineage = True
config.include_view_lineage = True
- config.incremental_lineage = False
+
pipeline = Pipeline(snowflake_pipeline_config1)
pipeline.run()
pipeline.raise_from_status() # pipeline should not fail
diff --git a/metadata-ingestion/tests/integration/trino/trino_hive_mces_golden.json b/metadata-ingestion/tests/integration/trino/trino_hive_mces_golden.json
index 19961e48b4a33..c43223c68a6b6 100644
--- a/metadata-ingestion/tests/integration/trino/trino_hive_mces_golden.json
+++ b/metadata-ingestion/tests/integration/trino/trino_hive_mces_golden.json
@@ -16,7 +16,8 @@
},
"systemMetadata": {
"lastObserved": 1632398400000,
- "runId": "trino-hive-test"
+ "runId": "trino-hive-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -31,7 +32,8 @@
},
"systemMetadata": {
"lastObserved": 1632398400000,
- "runId": "trino-hive-test"
+ "runId": "trino-hive-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -46,7 +48,8 @@
},
"systemMetadata": {
"lastObserved": 1632398400000,
- "runId": "trino-hive-test"
+ "runId": "trino-hive-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -63,7 +66,8 @@
},
"systemMetadata": {
"lastObserved": 1632398400000,
- "runId": "trino-hive-test"
+ "runId": "trino-hive-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -78,7 +82,8 @@
},
"systemMetadata": {
"lastObserved": 1632398400000,
- "runId": "trino-hive-test"
+ "runId": "trino-hive-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -99,7 +104,8 @@
},
"systemMetadata": {
"lastObserved": 1632398400000,
- "runId": "trino-hive-test"
+ "runId": "trino-hive-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -114,7 +120,8 @@
},
"systemMetadata": {
"lastObserved": 1632398400000,
- "runId": "trino-hive-test"
+ "runId": "trino-hive-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -129,7 +136,8 @@
},
"systemMetadata": {
"lastObserved": 1632398400000,
- "runId": "trino-hive-test"
+ "runId": "trino-hive-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -146,7 +154,8 @@
},
"systemMetadata": {
"lastObserved": 1632398400000,
- "runId": "trino-hive-test"
+ "runId": "trino-hive-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -161,7 +170,8 @@
},
"systemMetadata": {
"lastObserved": 1632398400000,
- "runId": "trino-hive-test"
+ "runId": "trino-hive-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -181,7 +191,8 @@
},
"systemMetadata": {
"lastObserved": 1632398400000,
- "runId": "trino-hive-test"
+ "runId": "trino-hive-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -196,7 +207,8 @@
},
"systemMetadata": {
"lastObserved": 1632398400000,
- "runId": "trino-hive-test"
+ "runId": "trino-hive-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -219,7 +231,7 @@
"numrows": "1",
"rawdatasize": "32",
"totalsize": "33",
- "transient_lastddltime": "1688422059"
+ "transient_lastddltime": "1698223433"
},
"name": "array_struct_test",
"description": "This table has array of structs",
@@ -315,7 +327,8 @@
},
"systemMetadata": {
"lastObserved": 1632398400000,
- "runId": "trino-hive-test"
+ "runId": "trino-hive-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -332,7 +345,8 @@
},
"systemMetadata": {
"lastObserved": 1632398400000,
- "runId": "trino-hive-test"
+ "runId": "trino-hive-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -356,7 +370,8 @@
},
"systemMetadata": {
"lastObserved": 1632398400000,
- "runId": "trino-hive-test"
+ "runId": "trino-hive-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -371,7 +386,8 @@
},
"systemMetadata": {
"lastObserved": 1632398400000,
- "runId": "trino-hive-test"
+ "runId": "trino-hive-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -392,7 +408,7 @@
"numrows": "0",
"rawdatasize": "0",
"totalsize": "0",
- "transient_lastddltime": "1688422063"
+ "transient_lastddltime": "1698223435"
},
"name": "map_test",
"tags": []
@@ -454,7 +470,8 @@
},
"systemMetadata": {
"lastObserved": 1632398400000,
- "runId": "trino-hive-test"
+ "runId": "trino-hive-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -471,7 +488,8 @@
},
"systemMetadata": {
"lastObserved": 1632398400000,
- "runId": "trino-hive-test"
+ "runId": "trino-hive-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -495,7 +513,8 @@
},
"systemMetadata": {
"lastObserved": 1632398400000,
- "runId": "trino-hive-test"
+ "runId": "trino-hive-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -510,7 +529,8 @@
},
"systemMetadata": {
"lastObserved": 1632398400000,
- "runId": "trino-hive-test"
+ "runId": "trino-hive-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -531,7 +551,7 @@
"numrows": "0",
"rawdatasize": "0",
"totalsize": "0",
- "transient_lastddltime": "1688422062"
+ "transient_lastddltime": "1698223435"
},
"name": "nested_struct_test",
"tags": []
@@ -642,7 +662,8 @@
},
"systemMetadata": {
"lastObserved": 1632398400000,
- "runId": "trino-hive-test"
+ "runId": "trino-hive-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -659,7 +680,8 @@
},
"systemMetadata": {
"lastObserved": 1632398400000,
- "runId": "trino-hive-test"
+ "runId": "trino-hive-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -683,7 +705,8 @@
},
"systemMetadata": {
"lastObserved": 1632398400000,
- "runId": "trino-hive-test"
+ "runId": "trino-hive-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -698,7 +721,8 @@
},
"systemMetadata": {
"lastObserved": 1632398400000,
- "runId": "trino-hive-test"
+ "runId": "trino-hive-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -714,7 +738,7 @@
{
"com.linkedin.pegasus2avro.dataset.DatasetProperties": {
"customProperties": {
- "transient_lastddltime": "1688421792"
+ "transient_lastddltime": "1698223429"
},
"name": "pokes",
"tags": []
@@ -784,7 +808,8 @@
},
"systemMetadata": {
"lastObserved": 1632398400000,
- "runId": "trino-hive-test"
+ "runId": "trino-hive-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -801,7 +826,8 @@
},
"systemMetadata": {
"lastObserved": 1632398400000,
- "runId": "trino-hive-test"
+ "runId": "trino-hive-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -825,7 +851,8 @@
},
"systemMetadata": {
"lastObserved": 1632398400000,
- "runId": "trino-hive-test"
+ "runId": "trino-hive-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -840,7 +867,8 @@
},
"systemMetadata": {
"lastObserved": 1632398400000,
- "runId": "trino-hive-test"
+ "runId": "trino-hive-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -861,7 +889,7 @@
"numrows": "0",
"rawdatasize": "0",
"totalsize": "0",
- "transient_lastddltime": "1688421808"
+ "transient_lastddltime": "1698223431"
},
"name": "struct_test",
"tags": []
@@ -950,7 +978,8 @@
},
"systemMetadata": {
"lastObserved": 1632398400000,
- "runId": "trino-hive-test"
+ "runId": "trino-hive-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -967,7 +996,8 @@
},
"systemMetadata": {
"lastObserved": 1632398400000,
- "runId": "trino-hive-test"
+ "runId": "trino-hive-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -991,7 +1021,8 @@
},
"systemMetadata": {
"lastObserved": 1632398400000,
- "runId": "trino-hive-test"
+ "runId": "trino-hive-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1006,7 +1037,8 @@
},
"systemMetadata": {
"lastObserved": 1632398400000,
- "runId": "trino-hive-test"
+ "runId": "trino-hive-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1024,7 +1056,7 @@
"customProperties": {
"numfiles": "0",
"totalsize": "0",
- "transient_lastddltime": "1688422062"
+ "transient_lastddltime": "1698223435"
},
"name": "struct_test_view_materialized",
"tags": []
@@ -1113,7 +1145,8 @@
},
"systemMetadata": {
"lastObserved": 1632398400000,
- "runId": "trino-hive-test"
+ "runId": "trino-hive-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1130,7 +1163,8 @@
},
"systemMetadata": {
"lastObserved": 1632398400000,
- "runId": "trino-hive-test"
+ "runId": "trino-hive-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1154,7 +1188,8 @@
},
"systemMetadata": {
"lastObserved": 1632398400000,
- "runId": "trino-hive-test"
+ "runId": "trino-hive-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1169,7 +1204,8 @@
},
"systemMetadata": {
"lastObserved": 1632398400000,
- "runId": "trino-hive-test"
+ "runId": "trino-hive-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1190,7 +1226,7 @@
"numrows": "0",
"rawdatasize": "0",
"totalsize": "0",
- "transient_lastddltime": "1688421807"
+ "transient_lastddltime": "1698223431"
},
"name": "_test_table_underscore",
"tags": []
@@ -1248,7 +1284,8 @@
},
"systemMetadata": {
"lastObserved": 1632398400000,
- "runId": "trino-hive-test"
+ "runId": "trino-hive-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1265,7 +1302,8 @@
},
"systemMetadata": {
"lastObserved": 1632398400000,
- "runId": "trino-hive-test"
+ "runId": "trino-hive-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1289,7 +1327,8 @@
},
"systemMetadata": {
"lastObserved": 1632398400000,
- "runId": "trino-hive-test"
+ "runId": "trino-hive-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1304,7 +1343,8 @@
},
"systemMetadata": {
"lastObserved": 1632398400000,
- "runId": "trino-hive-test"
+ "runId": "trino-hive-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1325,7 +1365,7 @@
"numrows": "0",
"rawdatasize": "0",
"totalsize": "0",
- "transient_lastddltime": "1688422062"
+ "transient_lastddltime": "1698223435"
},
"name": "union_test",
"tags": []
@@ -1467,7 +1507,8 @@
},
"systemMetadata": {
"lastObserved": 1632398400000,
- "runId": "trino-hive-test"
+ "runId": "trino-hive-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1484,7 +1525,8 @@
},
"systemMetadata": {
"lastObserved": 1632398400000,
- "runId": "trino-hive-test"
+ "runId": "trino-hive-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1508,7 +1550,8 @@
},
"systemMetadata": {
"lastObserved": 1632398400000,
- "runId": "trino-hive-test"
+ "runId": "trino-hive-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1523,7 +1566,8 @@
},
"systemMetadata": {
"lastObserved": 1632398400000,
- "runId": "trino-hive-test"
+ "runId": "trino-hive-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1539,7 +1583,7 @@
{
"com.linkedin.pegasus2avro.dataset.DatasetProperties": {
"customProperties": {
- "transient_lastddltime": "1688422062",
+ "transient_lastddltime": "1698223435",
"view_definition": "SELECT \"property_id\", \"service\"\nFROM \"db1\".\"array_struct_test\"",
"is_view": "True"
},
@@ -1634,7 +1678,8 @@
},
"systemMetadata": {
"lastObserved": 1632398400000,
- "runId": "trino-hive-test"
+ "runId": "trino-hive-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1651,7 +1696,8 @@
},
"systemMetadata": {
"lastObserved": 1632398400000,
- "runId": "trino-hive-test"
+ "runId": "trino-hive-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1668,7 +1714,57 @@
},
"systemMetadata": {
"lastObserved": 1632398400000,
- "runId": "trino-hive-test"
+ "runId": "trino-hive-test",
+ "lastRunId": "no-run-id-provided"
+ }
+},
+{
+ "entityType": "dataset",
+ "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,hivedb.db1.array_struct_test_view,PROD)",
+ "changeType": "UPSERT",
+ "aspectName": "upstreamLineage",
+ "aspect": {
+ "json": {
+ "upstreams": [
+ {
+ "auditStamp": {
+ "time": 0,
+ "actor": "urn:li:corpuser:unknown"
+ },
+ "dataset": "urn:li:dataset:(urn:li:dataPlatform:trino,hivedb.db1.array_struct_test,PROD)",
+ "type": "VIEW"
+ }
+ ],
+ "fineGrainedLineages": [
+ {
+ "upstreamType": "FIELD_SET",
+ "upstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:trino,hivedb.db1.array_struct_test,PROD),property_id)"
+ ],
+ "downstreamType": "FIELD",
+ "downstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:trino,hivedb.db1.array_struct_test_view,PROD),property_id)"
+ ],
+ "confidenceScore": 1.0
+ },
+ {
+ "upstreamType": "FIELD_SET",
+ "upstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:trino,hivedb.db1.array_struct_test,PROD),service)"
+ ],
+ "downstreamType": "FIELD",
+ "downstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:trino,hivedb.db1.array_struct_test_view,PROD),service)"
+ ],
+ "confidenceScore": 1.0
+ }
+ ]
+ }
+ },
+ "systemMetadata": {
+ "lastObserved": 1632398400000,
+ "runId": "trino-hive-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1692,7 +1788,8 @@
},
"systemMetadata": {
"lastObserved": 1632398400000,
- "runId": "trino-hive-test"
+ "runId": "trino-hive-test",
+ "lastRunId": "no-run-id-provided"
}
}
]
\ No newline at end of file
diff --git a/metadata-ingestion/tests/unit/api/source_helpers/test_incremental_lineage_helper.py b/metadata-ingestion/tests/unit/api/source_helpers/test_incremental_lineage_helper.py
index 54a22d860285c..e8485106c6a81 100644
--- a/metadata-ingestion/tests/unit/api/source_helpers/test_incremental_lineage_helper.py
+++ b/metadata-ingestion/tests/unit/api/source_helpers/test_incremental_lineage_helper.py
@@ -104,6 +104,27 @@ def test_incremental_table_lineage(tmp_path, pytestconfig):
)
+def test_incremental_table_lineage_empty_upstreams(tmp_path, pytestconfig):
+
+ urn = make_dataset_urn(platform, "dataset1")
+ aspect = make_lineage_aspect(
+ "dataset1",
+ upstreams=[],
+ )
+
+ processed_wus = auto_incremental_lineage(
+ graph=None,
+ incremental_lineage=True,
+ stream=[
+ MetadataChangeProposalWrapper(
+ entityUrn=urn, aspect=aspect, systemMetadata=system_metadata
+ ).as_workunit()
+ ],
+ )
+
+ assert [wu.metadata for wu in processed_wus] == []
+
+
@pytest.mark.parametrize(
"gms_aspect,current_aspect,output_aspect",
[
From a96a512166564cf9c40af4b83e7138dcb48c914d Mon Sep 17 00:00:00 2001
From: Tamas Nemeth
Date: Thu, 26 Oct 2023 18:46:10 +0200
Subject: [PATCH 002/792] fix(ingest/bigquery): Fixing lineage filter query
(#9114)
---
.../ingestion/source/bigquery_v2/bigquery_config.py | 1 +
.../datahub/ingestion/source/bigquery_v2/lineage.py | 8 ++++++--
.../src/datahub/ingestion/source/bigquery_v2/usage.py | 10 ++++++----
3 files changed, 13 insertions(+), 6 deletions(-)
diff --git a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_config.py b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_config.py
index 6203192769750..f762d451849ab 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_config.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_config.py
@@ -309,6 +309,7 @@ def backward_compatibility_configs_set(cls, values: Dict) -> Dict:
"dataset_pattern is not set but schema_pattern is set, using schema_pattern as dataset_pattern. schema_pattern will be deprecated, please use dataset_pattern instead."
)
values["dataset_pattern"] = schema_pattern
+ dataset_pattern = schema_pattern
elif (
dataset_pattern != AllowDenyPattern.allow_all()
and schema_pattern != AllowDenyPattern.allow_all()
diff --git a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/lineage.py b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/lineage.py
index aa462435b8105..e9acf5ea86044 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/lineage.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/lineage.py
@@ -20,6 +20,7 @@
from google.cloud.datacatalog import lineage_v1
from google.cloud.logging_v2.client import Client as GCPLoggingClient
+from datahub.configuration.pattern_utils import is_schema_allowed
from datahub.emitter import mce_builder
from datahub.emitter.mcp import MetadataChangeProposalWrapper
from datahub.ingestion.api.workunit import MetadataWorkUnit
@@ -683,8 +684,11 @@ def _create_lineage_map(
self.report.num_skipped_lineage_entries_missing_data[e.project_id] += 1
continue
- if not self.config.dataset_pattern.allowed(
- destination_table.table_identifier.dataset
+ if not is_schema_allowed(
+ self.config.dataset_pattern,
+ destination_table.table_identifier.dataset,
+ destination_table.table_identifier.project_id,
+ self.config.match_fully_qualified_names,
) or not self.config.table_pattern.allowed(
destination_table.table_identifier.get_table_name()
):
diff --git a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/usage.py b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/usage.py
index 7fc38991e5928..65b559550ffc5 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/usage.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/usage.py
@@ -21,6 +21,7 @@
import humanfriendly
+from datahub.configuration.pattern_utils import is_schema_allowed
from datahub.configuration.time_window_config import (
BaseTimeWindowConfig,
get_time_bucket,
@@ -335,10 +336,11 @@ def get_time_window(self) -> Tuple[datetime, datetime]:
def _is_table_allowed(self, table_ref: Optional[BigQueryTableRef]) -> bool:
return (
table_ref is not None
- and self.config.dataset_pattern.allowed(
- f"{table_ref.table_identifier.project_id}.{table_ref.table_identifier.dataset}"
- if self.config.match_fully_qualified_names
- else table_ref.table_identifier.dataset
+ and is_schema_allowed(
+ self.config.dataset_pattern,
+ table_ref.table_identifier.dataset,
+ table_ref.table_identifier.project_id,
+ self.config.match_fully_qualified_names,
)
and self.config.table_pattern.allowed(str(table_ref.table_identifier))
)
From 852267972c8efc1ceb5a0cbd71594d7ea2529d49 Mon Sep 17 00:00:00 2001
From: "nicholas.fwang"
Date: Fri, 27 Oct 2023 01:57:43 +0900
Subject: [PATCH 003/792] refactor(ingestion/mongodb): Add platform_instance to
mongodb (#8663)
Co-authored-by: Harshal Sheth
---
.../src/datahub/ingestion/source/mongodb.py | 16 +++++++++++++---
.../integration/mongodb/mongodb_mces_golden.json | 16 ++++++++--------
.../tests/integration/mongodb/test_mongodb.py | 1 +
3 files changed, 22 insertions(+), 11 deletions(-)
diff --git a/metadata-ingestion/src/datahub/ingestion/source/mongodb.py b/metadata-ingestion/src/datahub/ingestion/source/mongodb.py
index f02b6845e40b5..890c5c64bd5e6 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/mongodb.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/mongodb.py
@@ -11,7 +11,11 @@
from pymongo.mongo_client import MongoClient
from datahub.configuration.common import AllowDenyPattern
-from datahub.configuration.source_common import EnvConfigMixin
+from datahub.configuration.source_common import (
+ EnvConfigMixin,
+ PlatformInstanceConfigMixin,
+)
+from datahub.emitter.mce_builder import make_dataset_urn_with_platform_instance
from datahub.ingestion.api.common import PipelineContext
from datahub.ingestion.api.decorators import (
SourceCapability,
@@ -55,7 +59,7 @@
DENY_DATABASE_LIST = set(["admin", "config", "local"])
-class MongoDBConfig(EnvConfigMixin):
+class MongoDBConfig(PlatformInstanceConfigMixin, EnvConfigMixin):
# See the MongoDB authentication docs for details and examples.
# https://pymongo.readthedocs.io/en/stable/examples/authentication.html
connect_uri: str = Field(
@@ -199,6 +203,7 @@ def construct_schema_pymongo(
@platform_name("MongoDB")
@config_class(MongoDBConfig)
@support_status(SupportStatus.CERTIFIED)
+@capability(SourceCapability.PLATFORM_INSTANCE, "Enabled by default")
@capability(SourceCapability.SCHEMA_METADATA, "Enabled by default")
@dataclass
class MongoDBSource(Source):
@@ -320,7 +325,12 @@ def get_workunits_internal(self) -> Iterable[MetadataWorkUnit]:
self.report.report_dropped(dataset_name)
continue
- dataset_urn = f"urn:li:dataset:(urn:li:dataPlatform:{platform},{dataset_name},{self.config.env})"
+ dataset_urn = make_dataset_urn_with_platform_instance(
+ platform=platform,
+ name=dataset_name,
+ env=self.config.env,
+ platform_instance=self.config.platform_instance,
+ )
dataset_snapshot = DatasetSnapshot(
urn=dataset_urn,
diff --git a/metadata-ingestion/tests/integration/mongodb/mongodb_mces_golden.json b/metadata-ingestion/tests/integration/mongodb/mongodb_mces_golden.json
index 1f662cfe514e2..e16101b137ac9 100644
--- a/metadata-ingestion/tests/integration/mongodb/mongodb_mces_golden.json
+++ b/metadata-ingestion/tests/integration/mongodb/mongodb_mces_golden.json
@@ -2,7 +2,7 @@
{
"proposedSnapshot": {
"com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": {
- "urn": "urn:li:dataset:(urn:li:dataPlatform:mongodb,mngdb.emptyCollection,PROD)",
+ "urn": "urn:li:dataset:(urn:li:dataPlatform:mongodb,instance.mngdb.emptyCollection,PROD)",
"aspects": [
{
"com.linkedin.pegasus2avro.dataset.DatasetProperties": {
@@ -41,7 +41,7 @@
{
"proposedSnapshot": {
"com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": {
- "urn": "urn:li:dataset:(urn:li:dataPlatform:mongodb,mngdb.firstCollection,PROD)",
+ "urn": "urn:li:dataset:(urn:li:dataPlatform:mongodb,instance.mngdb.firstCollection,PROD)",
"aspects": [
{
"com.linkedin.pegasus2avro.dataset.DatasetProperties": {
@@ -345,7 +345,7 @@
{
"proposedSnapshot": {
"com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": {
- "urn": "urn:li:dataset:(urn:li:dataPlatform:mongodb,mngdb.largeCollection,PROD)",
+ "urn": "urn:li:dataset:(urn:li:dataPlatform:mongodb,instance.mngdb.largeCollection,PROD)",
"aspects": [
{
"com.linkedin.pegasus2avro.dataset.DatasetProperties": {
@@ -3988,7 +3988,7 @@
{
"proposedSnapshot": {
"com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": {
- "urn": "urn:li:dataset:(urn:li:dataPlatform:mongodb,mngdb.secondCollection,PROD)",
+ "urn": "urn:li:dataset:(urn:li:dataPlatform:mongodb,instance.mngdb.secondCollection,PROD)",
"aspects": [
{
"com.linkedin.pegasus2avro.dataset.DatasetProperties": {
@@ -4135,7 +4135,7 @@
},
{
"entityType": "dataset",
- "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mongodb,mngdb.emptyCollection,PROD)",
+ "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mongodb,instance.mngdb.emptyCollection,PROD)",
"changeType": "UPSERT",
"aspectName": "status",
"aspect": {
@@ -4150,7 +4150,7 @@
},
{
"entityType": "dataset",
- "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mongodb,mngdb.firstCollection,PROD)",
+ "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mongodb,instance.mngdb.firstCollection,PROD)",
"changeType": "UPSERT",
"aspectName": "status",
"aspect": {
@@ -4165,7 +4165,7 @@
},
{
"entityType": "dataset",
- "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mongodb,mngdb.largeCollection,PROD)",
+ "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mongodb,instance.mngdb.largeCollection,PROD)",
"changeType": "UPSERT",
"aspectName": "status",
"aspect": {
@@ -4180,7 +4180,7 @@
},
{
"entityType": "dataset",
- "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mongodb,mngdb.secondCollection,PROD)",
+ "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mongodb,instance.mngdb.secondCollection,PROD)",
"changeType": "UPSERT",
"aspectName": "status",
"aspect": {
diff --git a/metadata-ingestion/tests/integration/mongodb/test_mongodb.py b/metadata-ingestion/tests/integration/mongodb/test_mongodb.py
index 5228c21223e24..56fb471d4c9f1 100644
--- a/metadata-ingestion/tests/integration/mongodb/test_mongodb.py
+++ b/metadata-ingestion/tests/integration/mongodb/test_mongodb.py
@@ -25,6 +25,7 @@ def test_mongodb_ingest(docker_compose_runner, pytestconfig, tmp_path, mock_time
"username": "mongoadmin",
"password": "examplepass",
"maxDocumentSize": 25000,
+ "platform_instance": "instance",
},
},
"sink": {
From ce6f833be444497972f17fd8bfe170f00af4bca6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Mert=20Tun=C3=A7?=
Date: Fri, 27 Oct 2023 01:06:37 +0300
Subject: [PATCH 004/792] fix(kafka-setup): Don't set truststore pass for PEM
files (#8656)
Co-authored-by: david-leifker <114954101+david-leifker@users.noreply.github.com>
---
docker/kafka-setup/kafka-setup.sh | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/docker/kafka-setup/kafka-setup.sh b/docker/kafka-setup/kafka-setup.sh
index 629e9bc9484ee..b5024e49e59f1 100755
--- a/docker/kafka-setup/kafka-setup.sh
+++ b/docker/kafka-setup/kafka-setup.sh
@@ -36,7 +36,9 @@ if [[ $KAFKA_PROPERTIES_SECURITY_PROTOCOL == "SSL" ]]; then
fi
if [[ -n $KAFKA_PROPERTIES_SSL_TRUSTSTORE_LOCATION ]]; then
echo "ssl.truststore.location=$KAFKA_PROPERTIES_SSL_TRUSTSTORE_LOCATION" >> $CONNECTION_PROPERTIES_PATH
- echo "ssl.truststore.password=$KAFKA_PROPERTIES_SSL_TRUSTSTORE_PASSWORD" >> $CONNECTION_PROPERTIES_PATH
+ if [[ $KAFKA_PROPERTIES_SSL_TRUSTSTORE_TYPE != "PEM" ]]; then
+ echo "ssl.truststore.password=$KAFKA_PROPERTIES_SSL_TRUSTSTORE_PASSWORD" >> $CONNECTION_PROPERTIES_PATH
+ fi
if [[ -n $KAFKA_PROPERTIES_SSL_TRUSTSTORE_TYPE ]]; then
echo "ssl.truststore.type=$KAFKA_PROPERTIES_SSL_TRUSTSTORE_TYPE" >> $CONNECTION_PROPERTIES_PATH
fi
From 12f6fe0f906dd21fbc3985cfd13ceac4fc9ac8f0 Mon Sep 17 00:00:00 2001
From: Tony Ouyang
Date: Thu, 26 Oct 2023 15:07:36 -0700
Subject: [PATCH 005/792] fix(ingest): Fix roll back failure when
REST_API_AUTHORIZATION_ENABLED is set to true (#9092)
---
metadata-service/war/src/main/resources/boot/policies.json | 6 ++++--
1 file changed, 4 insertions(+), 2 deletions(-)
diff --git a/metadata-service/war/src/main/resources/boot/policies.json b/metadata-service/war/src/main/resources/boot/policies.json
index 410596cc30cbe..18cb48bfcf1f0 100644
--- a/metadata-service/war/src/main/resources/boot/policies.json
+++ b/metadata-service/war/src/main/resources/boot/policies.json
@@ -64,7 +64,8 @@
"GET_TIMELINE_PRIVILEGE",
"PRODUCE_PLATFORM_EVENT_PRIVILEGE",
"MANAGE_DATA_PRODUCTS",
- "MANAGE_GLOBAL_OWNERSHIP_TYPES"
+ "MANAGE_GLOBAL_OWNERSHIP_TYPES",
+ "DELETE_ENTITY"
],
"displayName":"Root User - Edit and View All Resources",
"description":"Grants full edit and view privileges for all resources to root 'datahub' root user.",
@@ -263,7 +264,8 @@
"GET_ENTITY_PRIVILEGE",
"GET_TIMELINE_PRIVILEGE",
"PRODUCE_PLATFORM_EVENT_PRIVILEGE",
- "MANAGE_DATA_PRODUCTS"
+ "MANAGE_DATA_PRODUCTS",
+ "DELETE_ENTITY"
],
"displayName":"Admins - Metadata Policy",
"description":"Admins have all metadata privileges.",
From 1ac831f07aa2bdab555acf50431f6466bb291f61 Mon Sep 17 00:00:00 2001
From: Raj Tekal
Date: Thu, 26 Oct 2023 19:33:09 -0400
Subject: [PATCH 006/792] (fix): Avoid
java.util.ConcurrentModificationException (#9090)
Co-authored-by: Pedro Silva
---
.../authorization/DataHubAuthorizer.java | 105 +++++++++++-------
1 file changed, 64 insertions(+), 41 deletions(-)
diff --git a/metadata-service/auth-impl/src/main/java/com/datahub/authorization/DataHubAuthorizer.java b/metadata-service/auth-impl/src/main/java/com/datahub/authorization/DataHubAuthorizer.java
index 4553139e3ca54..e30fb93109915 100644
--- a/metadata-service/auth-impl/src/main/java/com/datahub/authorization/DataHubAuthorizer.java
+++ b/metadata-service/auth-impl/src/main/java/com/datahub/authorization/DataHubAuthorizer.java
@@ -19,6 +19,8 @@
import java.util.concurrent.Executors;
import java.util.concurrent.ScheduledExecutorService;
import java.util.concurrent.TimeUnit;
+import java.util.concurrent.locks.ReadWriteLock;
+import java.util.concurrent.locks.ReentrantReadWriteLock;
import javax.annotation.Nonnull;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
@@ -53,6 +55,7 @@ public enum AuthorizationMode {
// Maps privilege name to the associated set of policies for fast access.
// Not concurrent data structure because writes are always against the entire thing.
private final Map> _policyCache = new HashMap<>(); // Shared Policy Cache.
+ private final ReadWriteLock _lockPolicyCache = new ReentrantReadWriteLock();
private final ScheduledExecutorService _refreshExecutorService = Executors.newScheduledThreadPool(1);
private final PolicyRefreshRunnable _policyRefreshRunnable;
@@ -71,7 +74,7 @@ public DataHubAuthorizer(
_systemAuthentication = Objects.requireNonNull(systemAuthentication);
_mode = Objects.requireNonNull(mode);
_policyEngine = new PolicyEngine(systemAuthentication, Objects.requireNonNull(entityClient));
- _policyRefreshRunnable = new PolicyRefreshRunnable(systemAuthentication, new PolicyFetcher(entityClient), _policyCache);
+ _policyRefreshRunnable = new PolicyRefreshRunnable(systemAuthentication, new PolicyFetcher(entityClient), _policyCache, _lockPolicyCache);
_refreshExecutorService.scheduleAtFixedRate(_policyRefreshRunnable, delayIntervalSeconds, refreshIntervalSeconds, TimeUnit.SECONDS);
}
@@ -90,31 +93,41 @@ public AuthorizationResult authorize(@Nonnull final AuthorizationRequest request
Optional resolvedResourceSpec = request.getResourceSpec().map(_entitySpecResolver::resolve);
- // 1. Fetch the policies relevant to the requested privilege.
- final List policiesToEvaluate = _policyCache.getOrDefault(request.getPrivilege(), new ArrayList<>());
-
- // 2. Evaluate each policy.
- for (DataHubPolicyInfo policy : policiesToEvaluate) {
- if (isRequestGranted(policy, request, resolvedResourceSpec)) {
- // Short circuit if policy has granted privileges to this actor.
- return new AuthorizationResult(request, AuthorizationResult.Type.ALLOW,
- String.format("Granted by policy with type: %s", policy.getType()));
+ _lockPolicyCache.readLock().lock();
+ try {
+ // 1. Fetch the policies relevant to the requested privilege.
+ final List policiesToEvaluate = _policyCache.getOrDefault(request.getPrivilege(), new ArrayList<>());
+
+ // 2. Evaluate each policy.
+ for (DataHubPolicyInfo policy : policiesToEvaluate) {
+ if (isRequestGranted(policy, request, resolvedResourceSpec)) {
+ // Short circuit if policy has granted privileges to this actor.
+ return new AuthorizationResult(request, AuthorizationResult.Type.ALLOW,
+ String.format("Granted by policy with type: %s", policy.getType()));
+ }
}
+ return new AuthorizationResult(request, AuthorizationResult.Type.DENY, null);
+ } finally {
+ _lockPolicyCache.readLock().unlock();
}
- return new AuthorizationResult(request, AuthorizationResult.Type.DENY, null);
}
public List getGrantedPrivileges(final String actor, final Optional resourceSpec) {
- // 1. Fetch all policies
- final List policiesToEvaluate = _policyCache.getOrDefault(ALL, new ArrayList<>());
+ _lockPolicyCache.readLock().lock();
+ try {
+ // 1. Fetch all policies
+ final List policiesToEvaluate = _policyCache.getOrDefault(ALL, new ArrayList<>());
- Urn actorUrn = UrnUtils.getUrn(actor);
- final ResolvedEntitySpec resolvedActorSpec = _entitySpecResolver.resolve(new EntitySpec(actorUrn.getEntityType(), actor));
+ Urn actorUrn = UrnUtils.getUrn(actor);
+ final ResolvedEntitySpec resolvedActorSpec = _entitySpecResolver.resolve(new EntitySpec(actorUrn.getEntityType(), actor));
- Optional resolvedResourceSpec = resourceSpec.map(_entitySpecResolver::resolve);
+ Optional resolvedResourceSpec = resourceSpec.map(_entitySpecResolver::resolve);
- return _policyEngine.getGrantedPrivileges(policiesToEvaluate, resolvedActorSpec, resolvedResourceSpec);
+ return _policyEngine.getGrantedPrivileges(policiesToEvaluate, resolvedActorSpec, resolvedResourceSpec);
+ } finally {
+ _lockPolicyCache.readLock().unlock();
+ }
}
/**
@@ -124,36 +137,42 @@ public List getGrantedPrivileges(final String actor, final Optional resourceSpec) {
- // Step 1: Find policies granting the privilege.
- final List policiesToEvaluate = _policyCache.getOrDefault(privilege, new ArrayList<>());
-
- Optional resolvedResourceSpec = resourceSpec.map(_entitySpecResolver::resolve);
final List authorizedUsers = new ArrayList<>();
final List authorizedGroups = new ArrayList<>();
boolean allUsers = false;
boolean allGroups = false;
- // Step 2: For each policy, determine whether the resource is a match.
- for (DataHubPolicyInfo policy : policiesToEvaluate) {
- if (!PoliciesConfig.ACTIVE_POLICY_STATE.equals(policy.getState())) {
- // Policy is not active, skip.
- continue;
- }
+ _lockPolicyCache.readLock().lock();
+ try {
+ // Step 1: Find policies granting the privilege.
+ final List policiesToEvaluate = _policyCache.getOrDefault(privilege, new ArrayList<>());
- final PolicyEngine.PolicyActors matchingActors = _policyEngine.getMatchingActors(policy, resolvedResourceSpec);
+ Optional resolvedResourceSpec = resourceSpec.map(_entitySpecResolver::resolve);
- // Step 3: For each matching policy, add actors that are authorized.
- authorizedUsers.addAll(matchingActors.getUsers());
- authorizedGroups.addAll(matchingActors.getGroups());
- if (matchingActors.allUsers()) {
- allUsers = true;
- }
- if (matchingActors.allGroups()) {
- allGroups = true;
+
+ // Step 2: For each policy, determine whether the resource is a match.
+ for (DataHubPolicyInfo policy : policiesToEvaluate) {
+ if (!PoliciesConfig.ACTIVE_POLICY_STATE.equals(policy.getState())) {
+ // Policy is not active, skip.
+ continue;
+ }
+
+ final PolicyEngine.PolicyActors matchingActors = _policyEngine.getMatchingActors(policy, resolvedResourceSpec);
+
+ // Step 3: For each matching policy, add actors that are authorized.
+ authorizedUsers.addAll(matchingActors.getUsers());
+ authorizedGroups.addAll(matchingActors.getGroups());
+ if (matchingActors.allUsers()) {
+ allUsers = true;
+ }
+ if (matchingActors.allGroups()) {
+ allGroups = true;
+ }
}
+ } finally {
+ _lockPolicyCache.readLock().unlock();
}
-
// Step 4: Return all authorized users and groups.
return new AuthorizedActors(privilege, authorizedUsers, authorizedGroups, allUsers, allGroups);
}
@@ -228,6 +247,7 @@ static class PolicyRefreshRunnable implements Runnable {
private final Authentication _systemAuthentication;
private final PolicyFetcher _policyFetcher;
private final Map> _policyCache;
+ private final ReadWriteLock _lockPolicyCache;
@Override
public void run() {
@@ -253,10 +273,13 @@ public void run() {
"Failed to retrieve policy urns! Skipping updating policy cache until next refresh. start: {}, count: {}", start, count, e);
return;
}
- synchronized (_policyCache) {
- _policyCache.clear();
- _policyCache.putAll(newCache);
- }
+ }
+ _lockPolicyCache.writeLock().lock();
+ try {
+ _policyCache.clear();
+ _policyCache.putAll(newCache);
+ } finally {
+ _lockPolicyCache.writeLock().unlock();
}
log.debug(String.format("Successfully fetched %s policies.", total));
} catch (Exception e) {
From cf617d77f383a19bd6a9bce00bb2cfbd6a226e55 Mon Sep 17 00:00:00 2001
From: David Sanchez
Date: Fri, 27 Oct 2023 07:01:30 +0200
Subject: [PATCH 007/792] Fix(ingest/bigquery): fix extracting comments from
complex types (#8950)
Co-authored-by: Mayuri Nehate <33225191+mayurinehate@users.noreply.github.com>
---
.../datahub/ingestion/source/bigquery_v2/bigquery.py | 10 ++++++++--
1 file changed, 8 insertions(+), 2 deletions(-)
diff --git a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery.py b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery.py
index 692d8c4f81bb6..6959a48313010 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery.py
@@ -1050,8 +1050,14 @@ def gen_schema_fields(self, columns: List[BigqueryColumn]) -> List[SchemaField]:
for idx, field in enumerate(schema_fields):
# Remove all the [version=2.0].[type=struct]. tags to get the field path
if (
- re.sub(r"\[.*?\]\.", "", field.fieldPath, 0, re.MULTILINE)
- == col.field_path
+ re.sub(
+ r"\[.*?\]\.",
+ "",
+ field.fieldPath.lower(),
+ 0,
+ re.MULTILINE,
+ )
+ == col.field_path.lower()
):
field.description = col.comment
schema_fields[idx] = field
From cc7511501b051b9a9f66dbcc4dc4ab16ce2668e5 Mon Sep 17 00:00:00 2001
From: Hyejin Yoon <0327jane@gmail.com>
Date: Fri, 27 Oct 2023 19:42:53 +0900
Subject: [PATCH 008/792] docs: add versions 0.12.0 (#9125)
---
docs-website/versions.json | 1 +
1 file changed, 1 insertion(+)
diff --git a/docs-website/versions.json b/docs-website/versions.json
index a5493c26a4c65..a66607b67ddd5 100644
--- a/docs-website/versions.json
+++ b/docs-website/versions.json
@@ -1,4 +1,5 @@
[
+ "0.12.0",
"0.11.0",
"0.10.5"
]
From 07a5e4c81b9e7b46faf9dbc830c9bac9648e5161 Mon Sep 17 00:00:00 2001
From: Chris Collins
Date: Fri, 27 Oct 2023 07:52:24 -0400
Subject: [PATCH 009/792] fix(ui) Fix filtering logic for everwhere generating
OR filters (#9116)
---
.../src/app/search/useGetSearchQueryInputs.ts | 12 ++----
.../utils/__tests__/generateOrFilters.test.ts | 38 ++++++++++++++-----
.../src/app/search/utils/generateOrFilters.ts | 16 +++++---
3 files changed, 43 insertions(+), 23 deletions(-)
diff --git a/datahub-web-react/src/app/search/useGetSearchQueryInputs.ts b/datahub-web-react/src/app/search/useGetSearchQueryInputs.ts
index 05419e5abed35..9a3af8fb8d56c 100644
--- a/datahub-web-react/src/app/search/useGetSearchQueryInputs.ts
+++ b/datahub-web-react/src/app/search/useGetSearchQueryInputs.ts
@@ -3,7 +3,7 @@ import { useLocation, useParams } from 'react-router';
import { useMemo } from 'react';
import { FacetFilterInput, EntityType } from '../../types.generated';
import { useEntityRegistry } from '../useEntityRegistry';
-import { ENTITY_FILTER_NAME, FILTER_DELIMITER, UnionType } from './utils/constants';
+import { ENTITY_FILTER_NAME, UnionType } from './utils/constants';
import { useUserContext } from '../context/useUserContext';
import useFilters from './utils/useFilters';
import { generateOrFilters } from './utils/generateOrFilters';
@@ -27,12 +27,6 @@ export default function useGetSearchQueryInputs(excludedFilterFields?: Array = useFilters(params);
- const nonNestedFilters = filters.filter(
- (f) => !f.field.includes(FILTER_DELIMITER) && !excludedFilterFields?.includes(f.field),
- );
- const nestedFilters = filters.filter(
- (f) => f.field.includes(FILTER_DELIMITER) && !excludedFilterFields?.includes(f.field),
- );
const entityFilters: Array = useMemo(
() =>
filters
@@ -43,8 +37,8 @@ export default function useGetSearchQueryInputs(excludedFilterFields?: Array generateOrFilters(unionType, nonNestedFilters, nestedFilters),
- [nonNestedFilters, nestedFilters, unionType],
+ () => generateOrFilters(unionType, filters, excludedFilterFields),
+ [filters, excludedFilterFields, unionType],
);
return { entityFilters, query, unionType, filters, orFilters, viewUrn, page, activeType, sortInput };
diff --git a/datahub-web-react/src/app/search/utils/__tests__/generateOrFilters.test.ts b/datahub-web-react/src/app/search/utils/__tests__/generateOrFilters.test.ts
index 505c50efb289f..fd5a5691b454e 100644
--- a/datahub-web-react/src/app/search/utils/__tests__/generateOrFilters.test.ts
+++ b/datahub-web-react/src/app/search/utils/__tests__/generateOrFilters.test.ts
@@ -1,7 +1,7 @@
import {
DOMAINS_FILTER_NAME,
ENTITY_SUB_TYPE_FILTER_NAME,
- ENTITY_TYPE_FILTER_NAME,
+ ENTITY_FILTER_NAME,
TAGS_FILTER_NAME,
UnionType,
} from '../constants';
@@ -10,7 +10,7 @@ import { generateOrFilters } from '../generateOrFilters';
describe('generateOrFilters', () => {
it('should generate orFilters with UnionType.AND', () => {
const filters = [
- { field: ENTITY_TYPE_FILTER_NAME, values: ['DATASET', 'CONTAINER'] },
+ { field: ENTITY_FILTER_NAME, values: ['DATASET', 'CONTAINER'] },
{ field: TAGS_FILTER_NAME, values: ['urn:li:tag:tag1'] },
];
const orFilters = generateOrFilters(UnionType.AND, filters);
@@ -24,7 +24,7 @@ describe('generateOrFilters', () => {
it('should generate orFilters with UnionType.OR', () => {
const filters = [
- { field: ENTITY_TYPE_FILTER_NAME, values: ['DATASET', 'CONTAINER'] },
+ { field: ENTITY_FILTER_NAME, values: ['DATASET', 'CONTAINER'] },
{ field: TAGS_FILTER_NAME, values: ['urn:li:tag:tag1'] },
];
const orFilters = generateOrFilters(UnionType.OR, filters);
@@ -43,17 +43,23 @@ describe('generateOrFilters', () => {
const filters = [
{ field: TAGS_FILTER_NAME, values: ['urn:li:tag:tag1'] },
{ field: DOMAINS_FILTER_NAME, values: ['urn:li:domains:domain1'] },
+ { field: ENTITY_SUB_TYPE_FILTER_NAME, values: ['CONTAINER', 'DATASET␞table'] },
];
- const nestedFilters = [{ field: ENTITY_SUB_TYPE_FILTER_NAME, values: ['CONTAINER', 'DATASET␞table'] }];
- const orFilters = generateOrFilters(UnionType.AND, filters, nestedFilters);
+ // const nestedFilters = [{ field: ENTITY_SUB_TYPE_FILTER_NAME, values: ['CONTAINER', 'DATASET␞table'] }];
+ const orFilters = generateOrFilters(UnionType.AND, filters);
expect(orFilters).toMatchObject([
{
- and: [...filters, { field: '_entityType', values: ['CONTAINER'] }],
+ and: [
+ { field: TAGS_FILTER_NAME, values: ['urn:li:tag:tag1'] },
+ { field: DOMAINS_FILTER_NAME, values: ['urn:li:domains:domain1'] },
+ { field: '_entityType', values: ['CONTAINER'] },
+ ],
},
{
and: [
- ...filters,
+ { field: TAGS_FILTER_NAME, values: ['urn:li:tag:tag1'] },
+ { field: DOMAINS_FILTER_NAME, values: ['urn:li:domains:domain1'] },
{ field: '_entityType', values: ['DATASET'] },
{ field: 'typeNames', values: ['table'] },
],
@@ -65,9 +71,9 @@ describe('generateOrFilters', () => {
const filters = [
{ field: TAGS_FILTER_NAME, values: ['urn:li:tag:tag1'] },
{ field: DOMAINS_FILTER_NAME, values: ['urn:li:domains:domain1'] },
+ { field: ENTITY_SUB_TYPE_FILTER_NAME, values: ['CONTAINER', 'DATASET␞table'] },
];
- const nestedFilters = [{ field: ENTITY_SUB_TYPE_FILTER_NAME, values: ['CONTAINER', 'DATASET␞table'] }];
- const orFilters = generateOrFilters(UnionType.OR, filters, nestedFilters);
+ const orFilters = generateOrFilters(UnionType.OR, filters);
expect(orFilters).toMatchObject([
{
@@ -87,4 +93,18 @@ describe('generateOrFilters', () => {
},
]);
});
+
+ it('should generate orFilters and exclude filters with a provided exclude field', () => {
+ const filters = [
+ { field: ENTITY_FILTER_NAME, values: ['DATASET', 'CONTAINER'] },
+ { field: TAGS_FILTER_NAME, values: ['urn:li:tag:tag1'] },
+ ];
+ const orFilters = generateOrFilters(UnionType.AND, filters, [ENTITY_FILTER_NAME]);
+
+ expect(orFilters).toMatchObject([
+ {
+ and: [{ field: TAGS_FILTER_NAME, values: ['urn:li:tag:tag1'] }],
+ },
+ ]);
+ });
});
diff --git a/datahub-web-react/src/app/search/utils/generateOrFilters.ts b/datahub-web-react/src/app/search/utils/generateOrFilters.ts
index b665a2e0f0495..fa2939b3436f5 100644
--- a/datahub-web-react/src/app/search/utils/generateOrFilters.ts
+++ b/datahub-web-react/src/app/search/utils/generateOrFilters.ts
@@ -26,20 +26,26 @@ function generateInputWithNestedFilters(filters: FacetFilterInput[], nestedFilte
export function generateOrFilters(
unionType: UnionType,
filters: FacetFilterInput[],
- nestedFilters: FacetFilterInput[] = [],
+ excludedFilterFields: string[] = [],
): AndFilterInput[] {
- if ((filters?.length || 0) === 0 && nestedFilters.length === 0) {
+ if ((filters?.length || 0) === 0) {
return [];
}
+ const nonNestedFilters = filters.filter(
+ (f) => !f.field.includes(FILTER_DELIMITER) && !excludedFilterFields?.includes(f.field),
+ );
+ const nestedFilters = filters.filter(
+ (f) => f.field.includes(FILTER_DELIMITER) && !excludedFilterFields?.includes(f.field),
+ );
if (unionType === UnionType.OR) {
const orFiltersWithNestedFilters = generateInputWithNestedFilters([], nestedFilters);
- const orFilters = filters.map((filter) => ({
+ const orFilters = nonNestedFilters.map((filter) => ({
and: [filter],
}));
return [...orFilters, ...orFiltersWithNestedFilters];
}
- const andFiltersWithNestedFilters = generateInputWithNestedFilters(filters, nestedFilters);
+ const andFiltersWithNestedFilters = generateInputWithNestedFilters(nonNestedFilters, nestedFilters);
if (andFiltersWithNestedFilters.length) {
return andFiltersWithNestedFilters;
@@ -47,7 +53,7 @@ export function generateOrFilters(
return [
{
- and: filters,
+ and: nonNestedFilters,
},
];
}
From 379ffc8d9457bb86029383e857aaa41eae40f329 Mon Sep 17 00:00:00 2001
From: Pedro Silva
Date: Fri, 27 Oct 2023 17:17:27 +0100
Subject: [PATCH 010/792] build(release): Update files for 0.12.0 release
(#9130)
---
.../src/app/ingest/source/builder/NameSourceStep.tsx | 2 +-
gradle/versioning/versioning.gradle | 2 +-
.../tests/cypress/cypress/e2e/mutations/managed_ingestion.js | 2 +-
3 files changed, 3 insertions(+), 3 deletions(-)
diff --git a/datahub-web-react/src/app/ingest/source/builder/NameSourceStep.tsx b/datahub-web-react/src/app/ingest/source/builder/NameSourceStep.tsx
index f4c048bcaf0d2..3092364bb8bdd 100644
--- a/datahub-web-react/src/app/ingest/source/builder/NameSourceStep.tsx
+++ b/datahub-web-react/src/app/ingest/source/builder/NameSourceStep.tsx
@@ -190,7 +190,7 @@ export const NameSourceStep = ({ state, updateState, prev, submit }: StepProps)
setVersion(event.target.value)}
/>
diff --git a/gradle/versioning/versioning.gradle b/gradle/versioning/versioning.gradle
index 1fac894d165a8..39a8a3faf8011 100644
--- a/gradle/versioning/versioning.gradle
+++ b/gradle/versioning/versioning.gradle
@@ -21,7 +21,7 @@ Produces the following variables and supports token replacement
import org.apache.tools.ant.filters.ReplaceTokens
def detailedVersionString = "0.0.0-unknown-SNAPSHOT"
-def cliMajorVersion = "0.10.5" // base default cli major version
+def cliMajorVersion = "0.12.0" // base default cli major version
def snapshotVersion = false
if (project.hasProperty("releaseVersion")) {
version = releaseVersion
diff --git a/smoke-test/tests/cypress/cypress/e2e/mutations/managed_ingestion.js b/smoke-test/tests/cypress/cypress/e2e/mutations/managed_ingestion.js
index 24a24cc21138d..3d052695e818f 100644
--- a/smoke-test/tests/cypress/cypress/e2e/mutations/managed_ingestion.js
+++ b/smoke-test/tests/cypress/cypress/e2e/mutations/managed_ingestion.js
@@ -7,7 +7,7 @@ describe("run managed ingestion", () => {
it("create run managed ingestion source", () => {
let number = Math.floor(Math.random() * 100000);
let testName = `cypress test source ${number}`
- let cli_version = "0.10.5.4";
+ let cli_version = "0.12.0";
cy.login();
cy.goToIngestionPage();
cy.clickOptionWithText("Create new source");
From 5166d90433123891bc8f9555d4c6660a2b5c1451 Mon Sep 17 00:00:00 2001
From: Mayuri Nehate <33225191+mayurinehate@users.noreply.github.com>
Date: Fri, 27 Oct 2023 22:49:51 +0530
Subject: [PATCH 011/792] fix(ingest/sql-server): update queries to use escaped
procedure name (#9127)
---
.../ingestion/source/sql/mssql/source.py | 4 +-
.../golden_mces_mssql_no_db_to_file.json | 774 +++++++++++-------
.../golden_mces_mssql_no_db_with_filter.json | 471 +++++++----
.../golden_mces_mssql_to_file.json | 471 +++++++----
...golden_mces_mssql_with_lower_case_urn.json | 365 ++++++---
.../integration/sql_server/setup/setup.sql | 2 +-
6 files changed, 1324 insertions(+), 763 deletions(-)
diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/mssql/source.py b/metadata-ingestion/src/datahub/ingestion/source/sql/mssql/source.py
index 685d4fb3074c9..710825c8ba55d 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/sql/mssql/source.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/sql/mssql/source.py
@@ -530,7 +530,7 @@ def _get_procedure_inputs(
def _get_procedure_code(
conn: Connection, procedure: StoredProcedure
) -> Tuple[Optional[str], Optional[str]]:
- query = f"EXEC [{procedure.db}].dbo.sp_helptext '{procedure.full_name}'"
+ query = f"EXEC [{procedure.db}].dbo.sp_helptext '{procedure.escape_full_name}'"
try:
code_data = conn.execute(query)
except ProgrammingError:
@@ -567,7 +567,7 @@ def _get_procedure_properties(
create_date as date_created,
modify_date as date_modified
FROM sys.procedures
- WHERE object_id = object_id('{procedure.full_name}')
+ WHERE object_id = object_id('{procedure.escape_full_name}')
"""
)
properties = {}
diff --git a/metadata-ingestion/tests/integration/sql_server/golden_files/golden_mces_mssql_no_db_to_file.json b/metadata-ingestion/tests/integration/sql_server/golden_files/golden_mces_mssql_no_db_to_file.json
index a495d04c4e398..2fe7a76fd01ae 100644
--- a/metadata-ingestion/tests/integration/sql_server/golden_files/golden_mces_mssql_no_db_to_file.json
+++ b/metadata-ingestion/tests/integration/sql_server/golden_files/golden_mces_mssql_no_db_to_file.json
@@ -16,7 +16,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -31,7 +32,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -46,7 +48,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -63,7 +66,24 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
+ }
+},
+{
+ "entityType": "container",
+ "entityUrn": "urn:li:container:b7062d1c0c650d9de0f7a9a5de00b1b5",
+ "changeType": "UPSERT",
+ "aspectName": "browsePathsV2",
+ "aspect": {
+ "json": {
+ "path": []
+ }
+ },
+ "systemMetadata": {
+ "lastObserved": 1615443388097,
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -80,7 +100,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -91,11 +112,11 @@
"aspect": {
"json": {
"customProperties": {
- "job_id": "1df94c0f-15fd-4b68-8ca3-6053a0332362",
+ "job_id": "1f2f14ba-db84-4fa1-910e-7df71bede642",
"job_name": "Weekly Demo Data Backup",
"description": "No description available.",
- "date_created": "2023-03-10 16:27:54.970000",
- "date_modified": "2023-03-10 16:27:55.097000",
+ "date_created": "2023-10-27 10:11:55.540000",
+ "date_modified": "2023-10-27 10:11:55.667000",
"step_id": "1",
"step_name": "Set database to read only",
"subsystem": "TSQL",
@@ -110,7 +131,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -127,22 +149,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
- }
-},
-{
- "entityType": "container",
- "entityUrn": "urn:li:container:b7062d1c0c650d9de0f7a9a5de00b1b5",
- "changeType": "UPSERT",
- "aspectName": "browsePathsV2",
- "aspect": {
- "json": {
- "path": []
- }
- },
- "systemMetadata": {
- "lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -163,7 +171,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -178,7 +187,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -193,7 +203,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -210,7 +221,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -225,7 +237,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -245,7 +258,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -266,7 +280,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -281,7 +296,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -296,7 +312,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -313,7 +330,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -328,7 +346,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -348,7 +367,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -369,7 +389,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -384,7 +405,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -399,7 +421,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -416,7 +439,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -431,7 +455,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -451,7 +476,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -472,7 +498,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -487,7 +514,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -502,7 +530,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -519,7 +548,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -534,7 +564,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -554,7 +585,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -575,7 +607,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -590,7 +623,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -605,7 +639,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -622,7 +657,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -637,7 +673,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -657,7 +694,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -678,7 +716,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -693,7 +732,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -708,7 +748,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -725,7 +766,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -740,7 +782,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -760,7 +803,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -781,7 +825,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -796,7 +841,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -811,7 +857,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -828,7 +875,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -843,7 +891,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -863,7 +912,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -884,7 +934,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -899,7 +950,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -914,7 +966,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -931,7 +984,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -946,7 +1000,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -966,7 +1021,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -987,7 +1043,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1002,7 +1059,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1017,7 +1075,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1034,7 +1093,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1049,7 +1109,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1069,7 +1130,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1090,7 +1152,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1105,7 +1168,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1120,7 +1184,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1137,7 +1202,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1152,7 +1218,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1172,7 +1239,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1187,7 +1255,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1259,7 +1328,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1276,7 +1346,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1300,7 +1371,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1321,7 +1393,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1336,7 +1409,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1351,7 +1425,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1368,7 +1443,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1383,7 +1459,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1403,7 +1480,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1418,7 +1496,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1491,7 +1570,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1508,7 +1588,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1532,7 +1613,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1547,7 +1629,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1644,7 +1727,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1661,7 +1745,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1685,7 +1770,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1700,7 +1786,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1796,7 +1883,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1813,7 +1901,33 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
+ }
+},
+{
+ "entityType": "dataset",
+ "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,DemoData.Foo.SalesReason,PROD)",
+ "changeType": "UPSERT",
+ "aspectName": "browsePathsV2",
+ "aspect": {
+ "json": {
+ "path": [
+ {
+ "id": "urn:li:container:b7062d1c0c650d9de0f7a9a5de00b1b5",
+ "urn": "urn:li:container:b7062d1c0c650d9de0f7a9a5de00b1b5"
+ },
+ {
+ "id": "urn:li:container:6e5c6d608d0a2dcc4eb03591382e5671",
+ "urn": "urn:li:container:6e5c6d608d0a2dcc4eb03591382e5671"
+ }
+ ]
+ }
+ },
+ "systemMetadata": {
+ "lastObserved": 1615443388097,
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1830,12 +1944,13 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
"entityType": "dataJob",
- "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(mssql,localhost.demodata.Foo.stored_procedures,PROD),DBs)",
+ "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(mssql,localhost.demodata.Foo.stored_procedures,PROD),Proc.With.SpecialChar)",
"changeType": "UPSERT",
"aspectName": "dataJobInfo",
"aspect": {
@@ -1843,14 +1958,14 @@
"customProperties": {
"procedure_depends_on": "{}",
"depending_on_procedure": "{}",
- "code": "CREATE PROCEDURE Foo.DBs @ID INT\nAS\n SELECT @ID AS ThatDB;\n",
+ "code": "CREATE PROCEDURE [Foo].[Proc.With.SpecialChar] @ID INT\nAS\n SELECT @ID AS ThatDB;\n",
"input parameters": "['@ID']",
"parameter @ID": "{'type': 'int'}",
- "date_created": "2023-03-10 16:27:54.907000",
- "date_modified": "2023-03-10 16:27:54.907000"
+ "date_created": "2023-10-27 10:11:55.460000",
+ "date_modified": "2023-10-27 10:11:55.460000"
},
"externalUrl": "",
- "name": "demodata.Foo.DBs",
+ "name": "demodata.Foo.Proc.With.SpecialChar",
"type": {
"string": "MSSQL_STORED_PROCEDURE"
}
@@ -1858,12 +1973,13 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
"entityType": "dataJob",
- "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(mssql,localhost.demodata.Foo.stored_procedures,PROD),DBs)",
+ "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(mssql,localhost.demodata.Foo.stored_procedures,PROD),Proc.With.SpecialChar)",
"changeType": "UPSERT",
"aspectName": "dataJobInputOutput",
"aspect": {
@@ -1875,31 +1991,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
- }
-},
-{
- "entityType": "dataset",
- "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,DemoData.Foo.SalesReason,PROD)",
- "changeType": "UPSERT",
- "aspectName": "browsePathsV2",
- "aspect": {
- "json": {
- "path": [
- {
- "id": "urn:li:container:b7062d1c0c650d9de0f7a9a5de00b1b5",
- "urn": "urn:li:container:b7062d1c0c650d9de0f7a9a5de00b1b5"
- },
- {
- "id": "urn:li:container:6e5c6d608d0a2dcc4eb03591382e5671",
- "urn": "urn:li:container:6e5c6d608d0a2dcc4eb03591382e5671"
- }
- ]
- }
- },
- "systemMetadata": {
- "lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1920,7 +2013,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1935,7 +2029,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1950,7 +2045,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1967,7 +2063,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1982,7 +2079,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -2002,7 +2100,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -2023,7 +2122,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -2038,7 +2138,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -2053,7 +2154,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -2070,7 +2172,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -2085,7 +2188,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -2105,7 +2209,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -2126,7 +2231,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -2141,7 +2247,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -2156,7 +2263,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -2173,7 +2281,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -2188,7 +2297,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -2208,7 +2318,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -2228,7 +2339,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -2243,7 +2355,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -2258,7 +2371,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -2275,7 +2389,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -2290,7 +2405,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -2311,7 +2427,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -2326,7 +2443,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -2341,7 +2459,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -2358,7 +2477,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -2373,7 +2493,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -2393,7 +2514,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -2414,7 +2536,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -2429,7 +2552,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -2444,7 +2568,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -2461,7 +2586,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -2476,7 +2602,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -2496,7 +2623,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -2517,7 +2645,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -2532,7 +2661,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -2547,7 +2677,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -2564,7 +2695,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -2579,7 +2711,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -2599,7 +2732,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -2620,7 +2754,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -2635,7 +2770,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -2650,7 +2786,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -2667,7 +2804,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -2682,7 +2820,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -2702,7 +2841,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -2723,7 +2863,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -2738,7 +2879,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -2753,7 +2895,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -2770,7 +2913,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -2785,7 +2929,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -2805,7 +2950,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -2826,7 +2972,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -2841,7 +2988,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -2856,7 +3004,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -2873,7 +3022,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -2888,7 +3038,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -2908,7 +3059,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -2929,7 +3081,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -2944,7 +3097,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -2959,7 +3113,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -2976,7 +3131,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -2991,7 +3147,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -3011,7 +3168,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -3032,7 +3190,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -3047,7 +3206,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -3062,7 +3222,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -3079,7 +3240,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -3094,7 +3256,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -3114,7 +3277,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -3135,7 +3299,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -3150,7 +3315,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -3165,7 +3331,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -3182,7 +3349,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -3197,7 +3365,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -3217,7 +3386,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -3238,7 +3408,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -3253,7 +3424,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -3268,7 +3440,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -3285,7 +3458,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -3300,7 +3474,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -3320,7 +3495,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -3335,7 +3511,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -3407,7 +3584,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -3424,7 +3602,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -3448,7 +3627,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -3469,7 +3649,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -3484,7 +3665,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -3499,7 +3681,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -3516,7 +3699,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -3531,7 +3715,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -3551,7 +3736,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -3566,7 +3752,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -3638,7 +3825,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -3655,7 +3843,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -3679,7 +3868,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -3694,7 +3884,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -3790,7 +3981,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -3807,7 +3999,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -3831,7 +4024,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -3852,7 +4046,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -3867,7 +4062,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -3882,7 +4078,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -3899,7 +4096,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -3914,7 +4112,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -3934,7 +4133,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -3955,7 +4155,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -3970,7 +4171,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -3985,7 +4187,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -4002,7 +4205,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -4017,7 +4221,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -4037,7 +4242,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -4058,7 +4264,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -4073,7 +4280,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -4088,7 +4296,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -4105,7 +4314,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -4120,27 +4330,34 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
- "entityType": "dataFlow",
- "entityUrn": "urn:li:dataFlow:(mssql,localhost.Weekly Demo Data Backup,PROD)",
+ "entityType": "container",
+ "entityUrn": "urn:li:container:c6627af82d44de89492e1a9315ae9f4b",
"changeType": "UPSERT",
- "aspectName": "status",
+ "aspectName": "browsePathsV2",
"aspect": {
"json": {
- "removed": false
+ "path": [
+ {
+ "id": "urn:li:container:9447d283fb4f95ce7474f1db0179bb59",
+ "urn": "urn:li:container:9447d283fb4f95ce7474f1db0179bb59"
+ }
+ ]
}
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
"entityType": "dataFlow",
- "entityUrn": "urn:li:dataFlow:(mssql,localhost.demodata.Foo.stored_procedures,PROD)",
+ "entityUrn": "urn:li:dataFlow:(mssql,localhost.Weekly Demo Data Backup,PROD)",
"changeType": "UPSERT",
"aspectName": "status",
"aspect": {
@@ -4150,12 +4367,13 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
- "entityType": "dataJob",
- "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(mssql,localhost.Weekly Demo Data Backup,PROD),localhost.Weekly Demo Data Backup)",
+ "entityType": "dataFlow",
+ "entityUrn": "urn:li:dataFlow:(mssql,localhost.demodata.Foo.stored_procedures,PROD)",
"changeType": "UPSERT",
"aspectName": "status",
"aspect": {
@@ -4165,12 +4383,13 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
"entityType": "dataJob",
- "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(mssql,localhost.demodata.Foo.stored_procedures,PROD),DBs)",
+ "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(mssql,localhost.Weekly Demo Data Backup,PROD),localhost.Weekly Demo Data Backup)",
"changeType": "UPSERT",
"aspectName": "status",
"aspect": {
@@ -4180,27 +4399,24 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
- "entityType": "container",
- "entityUrn": "urn:li:container:c6627af82d44de89492e1a9315ae9f4b",
+ "entityType": "dataJob",
+ "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(mssql,localhost.demodata.Foo.stored_procedures,PROD),Proc.With.SpecialChar)",
"changeType": "UPSERT",
- "aspectName": "browsePathsV2",
+ "aspectName": "status",
"aspect": {
"json": {
- "path": [
- {
- "id": "urn:li:container:9447d283fb4f95ce7474f1db0179bb59",
- "urn": "urn:li:container:9447d283fb4f95ce7474f1db0179bb59"
- }
- ]
+ "removed": false
}
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
}
]
\ No newline at end of file
diff --git a/metadata-ingestion/tests/integration/sql_server/golden_files/golden_mces_mssql_no_db_with_filter.json b/metadata-ingestion/tests/integration/sql_server/golden_files/golden_mces_mssql_no_db_with_filter.json
index 8277ff8bf7e89..c1984828750eb 100644
--- a/metadata-ingestion/tests/integration/sql_server/golden_files/golden_mces_mssql_no_db_with_filter.json
+++ b/metadata-ingestion/tests/integration/sql_server/golden_files/golden_mces_mssql_no_db_with_filter.json
@@ -16,7 +16,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -31,7 +32,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -46,7 +48,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -63,7 +66,24 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
+ }
+},
+{
+ "entityType": "container",
+ "entityUrn": "urn:li:container:b7062d1c0c650d9de0f7a9a5de00b1b5",
+ "changeType": "UPSERT",
+ "aspectName": "browsePathsV2",
+ "aspect": {
+ "json": {
+ "path": []
+ }
+ },
+ "systemMetadata": {
+ "lastObserved": 1615443388097,
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -80,7 +100,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -91,11 +112,11 @@
"aspect": {
"json": {
"customProperties": {
- "job_id": "1df94c0f-15fd-4b68-8ca3-6053a0332362",
+ "job_id": "1f2f14ba-db84-4fa1-910e-7df71bede642",
"job_name": "Weekly Demo Data Backup",
"description": "No description available.",
- "date_created": "2023-03-10 16:27:54.970000",
- "date_modified": "2023-03-10 16:27:55.097000",
+ "date_created": "2023-10-27 10:11:55.540000",
+ "date_modified": "2023-10-27 10:11:55.667000",
"step_id": "1",
"step_name": "Set database to read only",
"subsystem": "TSQL",
@@ -110,7 +131,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -127,22 +149,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
- }
-},
-{
- "entityType": "container",
- "entityUrn": "urn:li:container:b7062d1c0c650d9de0f7a9a5de00b1b5",
- "changeType": "UPSERT",
- "aspectName": "browsePathsV2",
- "aspect": {
- "json": {
- "path": []
- }
- },
- "systemMetadata": {
- "lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -163,7 +171,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -178,7 +187,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -193,7 +203,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -210,7 +221,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -225,7 +237,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -245,7 +258,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -266,7 +280,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -281,7 +296,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -296,7 +312,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -313,7 +330,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -328,7 +346,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -348,7 +367,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -369,7 +389,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -384,7 +405,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -399,7 +421,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -416,7 +439,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -431,7 +455,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -451,7 +476,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -472,7 +498,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -487,7 +514,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -502,7 +530,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -519,7 +548,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -534,7 +564,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -554,7 +585,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -575,7 +607,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -590,7 +623,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -605,7 +639,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -622,7 +657,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -637,7 +673,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -657,7 +694,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -678,7 +716,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -693,7 +732,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -708,7 +748,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -725,7 +766,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -740,7 +782,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -760,7 +803,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -781,7 +825,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -796,7 +841,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -811,7 +857,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -828,7 +875,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -843,7 +891,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -863,7 +912,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -884,7 +934,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -899,7 +950,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -914,7 +966,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -931,7 +984,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -946,7 +1000,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -966,7 +1021,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -987,7 +1043,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1002,7 +1059,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1017,7 +1075,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1034,7 +1093,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1049,7 +1109,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1069,7 +1130,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1090,7 +1152,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1105,7 +1168,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1120,7 +1184,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1137,7 +1202,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1152,7 +1218,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1172,7 +1239,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1187,7 +1255,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1259,7 +1328,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1276,7 +1346,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1300,7 +1371,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1321,7 +1393,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1336,7 +1409,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1351,7 +1425,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1368,7 +1443,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1383,7 +1459,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1403,7 +1480,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1418,7 +1496,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1491,7 +1570,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1508,7 +1588,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1532,7 +1613,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1547,7 +1629,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1644,7 +1727,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1661,7 +1745,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1685,7 +1770,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1700,7 +1786,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1796,7 +1883,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1813,7 +1901,33 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
+ }
+},
+{
+ "entityType": "dataset",
+ "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,DemoData.Foo.SalesReason,PROD)",
+ "changeType": "UPSERT",
+ "aspectName": "browsePathsV2",
+ "aspect": {
+ "json": {
+ "path": [
+ {
+ "id": "urn:li:container:b7062d1c0c650d9de0f7a9a5de00b1b5",
+ "urn": "urn:li:container:b7062d1c0c650d9de0f7a9a5de00b1b5"
+ },
+ {
+ "id": "urn:li:container:6e5c6d608d0a2dcc4eb03591382e5671",
+ "urn": "urn:li:container:6e5c6d608d0a2dcc4eb03591382e5671"
+ }
+ ]
+ }
+ },
+ "systemMetadata": {
+ "lastObserved": 1615443388097,
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1830,12 +1944,13 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
"entityType": "dataJob",
- "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(mssql,localhost.demodata.Foo.stored_procedures,PROD),DBs)",
+ "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(mssql,localhost.demodata.Foo.stored_procedures,PROD),Proc.With.SpecialChar)",
"changeType": "UPSERT",
"aspectName": "dataJobInfo",
"aspect": {
@@ -1843,14 +1958,14 @@
"customProperties": {
"procedure_depends_on": "{}",
"depending_on_procedure": "{}",
- "code": "CREATE PROCEDURE Foo.DBs @ID INT\nAS\n SELECT @ID AS ThatDB;\n",
+ "code": "CREATE PROCEDURE [Foo].[Proc.With.SpecialChar] @ID INT\nAS\n SELECT @ID AS ThatDB;\n",
"input parameters": "['@ID']",
"parameter @ID": "{'type': 'int'}",
- "date_created": "2023-03-10 16:27:54.907000",
- "date_modified": "2023-03-10 16:27:54.907000"
+ "date_created": "2023-10-27 10:11:55.460000",
+ "date_modified": "2023-10-27 10:11:55.460000"
},
"externalUrl": "",
- "name": "demodata.Foo.DBs",
+ "name": "demodata.Foo.Proc.With.SpecialChar",
"type": {
"string": "MSSQL_STORED_PROCEDURE"
}
@@ -1858,12 +1973,13 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
"entityType": "dataJob",
- "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(mssql,localhost.demodata.Foo.stored_procedures,PROD),DBs)",
+ "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(mssql,localhost.demodata.Foo.stored_procedures,PROD),Proc.With.SpecialChar)",
"changeType": "UPSERT",
"aspectName": "dataJobInputOutput",
"aspect": {
@@ -1875,31 +1991,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
- }
-},
-{
- "entityType": "dataset",
- "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,DemoData.Foo.SalesReason,PROD)",
- "changeType": "UPSERT",
- "aspectName": "browsePathsV2",
- "aspect": {
- "json": {
- "path": [
- {
- "id": "urn:li:container:b7062d1c0c650d9de0f7a9a5de00b1b5",
- "urn": "urn:li:container:b7062d1c0c650d9de0f7a9a5de00b1b5"
- },
- {
- "id": "urn:li:container:6e5c6d608d0a2dcc4eb03591382e5671",
- "urn": "urn:li:container:6e5c6d608d0a2dcc4eb03591382e5671"
- }
- ]
- }
- },
- "systemMetadata": {
- "lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1920,7 +2013,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1935,7 +2029,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1950,7 +2045,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1967,7 +2063,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1982,7 +2079,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -2002,7 +2100,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -2023,7 +2122,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -2038,7 +2138,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -2053,7 +2154,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -2070,7 +2172,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -2085,7 +2188,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -2105,7 +2209,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -2126,7 +2231,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -2141,7 +2247,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -2156,7 +2263,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -2173,7 +2281,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -2188,27 +2297,34 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
- "entityType": "dataFlow",
- "entityUrn": "urn:li:dataFlow:(mssql,localhost.Weekly Demo Data Backup,PROD)",
+ "entityType": "container",
+ "entityUrn": "urn:li:container:3f157d8292fb473142f19e2250af537f",
"changeType": "UPSERT",
- "aspectName": "status",
+ "aspectName": "browsePathsV2",
"aspect": {
"json": {
- "removed": false
+ "path": [
+ {
+ "id": "urn:li:container:b7062d1c0c650d9de0f7a9a5de00b1b5",
+ "urn": "urn:li:container:b7062d1c0c650d9de0f7a9a5de00b1b5"
+ }
+ ]
}
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
"entityType": "dataFlow",
- "entityUrn": "urn:li:dataFlow:(mssql,localhost.demodata.Foo.stored_procedures,PROD)",
+ "entityUrn": "urn:li:dataFlow:(mssql,localhost.Weekly Demo Data Backup,PROD)",
"changeType": "UPSERT",
"aspectName": "status",
"aspect": {
@@ -2218,12 +2334,13 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
- "entityType": "dataJob",
- "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(mssql,localhost.Weekly Demo Data Backup,PROD),localhost.Weekly Demo Data Backup)",
+ "entityType": "dataFlow",
+ "entityUrn": "urn:li:dataFlow:(mssql,localhost.demodata.Foo.stored_procedures,PROD)",
"changeType": "UPSERT",
"aspectName": "status",
"aspect": {
@@ -2233,12 +2350,13 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
"entityType": "dataJob",
- "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(mssql,localhost.demodata.Foo.stored_procedures,PROD),DBs)",
+ "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(mssql,localhost.Weekly Demo Data Backup,PROD),localhost.Weekly Demo Data Backup)",
"changeType": "UPSERT",
"aspectName": "status",
"aspect": {
@@ -2248,27 +2366,24 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
- "entityType": "container",
- "entityUrn": "urn:li:container:3f157d8292fb473142f19e2250af537f",
+ "entityType": "dataJob",
+ "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(mssql,localhost.demodata.Foo.stored_procedures,PROD),Proc.With.SpecialChar)",
"changeType": "UPSERT",
- "aspectName": "browsePathsV2",
+ "aspectName": "status",
"aspect": {
"json": {
- "path": [
- {
- "id": "urn:li:container:b7062d1c0c650d9de0f7a9a5de00b1b5",
- "urn": "urn:li:container:b7062d1c0c650d9de0f7a9a5de00b1b5"
- }
- ]
+ "removed": false
}
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
}
]
\ No newline at end of file
diff --git a/metadata-ingestion/tests/integration/sql_server/golden_files/golden_mces_mssql_to_file.json b/metadata-ingestion/tests/integration/sql_server/golden_files/golden_mces_mssql_to_file.json
index f3714bba6364d..804a8d74d0d51 100644
--- a/metadata-ingestion/tests/integration/sql_server/golden_files/golden_mces_mssql_to_file.json
+++ b/metadata-ingestion/tests/integration/sql_server/golden_files/golden_mces_mssql_to_file.json
@@ -16,7 +16,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -31,7 +32,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -46,7 +48,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -63,7 +66,24 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
+ }
+},
+{
+ "entityType": "container",
+ "entityUrn": "urn:li:container:b7062d1c0c650d9de0f7a9a5de00b1b5",
+ "changeType": "UPSERT",
+ "aspectName": "browsePathsV2",
+ "aspect": {
+ "json": {
+ "path": []
+ }
+ },
+ "systemMetadata": {
+ "lastObserved": 1615443388097,
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -80,7 +100,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -91,11 +112,11 @@
"aspect": {
"json": {
"customProperties": {
- "job_id": "1df94c0f-15fd-4b68-8ca3-6053a0332362",
+ "job_id": "1f2f14ba-db84-4fa1-910e-7df71bede642",
"job_name": "Weekly Demo Data Backup",
"description": "No description available.",
- "date_created": "2023-03-10 16:27:54.970000",
- "date_modified": "2023-03-10 16:27:55.097000",
+ "date_created": "2023-10-27 10:11:55.540000",
+ "date_modified": "2023-10-27 10:11:55.667000",
"step_id": "1",
"step_name": "Set database to read only",
"subsystem": "TSQL",
@@ -110,7 +131,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -127,22 +149,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
- }
-},
-{
- "entityType": "container",
- "entityUrn": "urn:li:container:b7062d1c0c650d9de0f7a9a5de00b1b5",
- "changeType": "UPSERT",
- "aspectName": "browsePathsV2",
- "aspect": {
- "json": {
- "path": []
- }
- },
- "systemMetadata": {
- "lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -163,7 +171,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -178,7 +187,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -193,7 +203,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -210,7 +221,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -225,7 +237,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -245,7 +258,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -266,7 +280,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -281,7 +296,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -296,7 +312,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -313,7 +330,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -328,7 +346,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -348,7 +367,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -369,7 +389,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -384,7 +405,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -399,7 +421,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -416,7 +439,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -431,7 +455,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -451,7 +476,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -472,7 +498,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -487,7 +514,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -502,7 +530,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -519,7 +548,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -534,7 +564,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -554,7 +585,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -575,7 +607,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -590,7 +623,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -605,7 +639,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -622,7 +657,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -637,7 +673,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -657,7 +694,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -678,7 +716,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -693,7 +732,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -708,7 +748,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -725,7 +766,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -740,7 +782,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -760,7 +803,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -781,7 +825,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -796,7 +841,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -811,7 +857,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -828,7 +875,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -843,7 +891,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -863,7 +912,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -884,7 +934,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -899,7 +950,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -914,7 +966,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -931,7 +984,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -946,7 +1000,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -966,7 +1021,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -987,7 +1043,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1002,7 +1059,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1017,7 +1075,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1034,7 +1093,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1049,7 +1109,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1069,7 +1130,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1090,7 +1152,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1105,7 +1168,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1120,7 +1184,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1137,7 +1202,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1152,7 +1218,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1172,7 +1239,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1187,7 +1255,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1259,7 +1328,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1276,7 +1346,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1300,7 +1371,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1321,7 +1393,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1336,7 +1409,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1351,7 +1425,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1368,7 +1443,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1383,7 +1459,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1403,7 +1480,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1418,7 +1496,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1491,7 +1570,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1508,7 +1588,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1532,7 +1613,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1547,7 +1629,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1644,7 +1727,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1661,7 +1745,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1685,7 +1770,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1700,7 +1786,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1796,7 +1883,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1813,7 +1901,33 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
+ }
+},
+{
+ "entityType": "dataset",
+ "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,DemoDataAlias.Foo.SalesReason,PROD)",
+ "changeType": "UPSERT",
+ "aspectName": "browsePathsV2",
+ "aspect": {
+ "json": {
+ "path": [
+ {
+ "id": "urn:li:container:b7062d1c0c650d9de0f7a9a5de00b1b5",
+ "urn": "urn:li:container:b7062d1c0c650d9de0f7a9a5de00b1b5"
+ },
+ {
+ "id": "urn:li:container:6e5c6d608d0a2dcc4eb03591382e5671",
+ "urn": "urn:li:container:6e5c6d608d0a2dcc4eb03591382e5671"
+ }
+ ]
+ }
+ },
+ "systemMetadata": {
+ "lastObserved": 1615443388097,
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1830,12 +1944,13 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
"entityType": "dataJob",
- "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(mssql,localhost.demodata.Foo.stored_procedures,PROD),DBs)",
+ "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(mssql,localhost.demodata.Foo.stored_procedures,PROD),Proc.With.SpecialChar)",
"changeType": "UPSERT",
"aspectName": "dataJobInfo",
"aspect": {
@@ -1843,14 +1958,14 @@
"customProperties": {
"procedure_depends_on": "{}",
"depending_on_procedure": "{}",
- "code": "CREATE PROCEDURE Foo.DBs @ID INT\nAS\n SELECT @ID AS ThatDB;\n",
+ "code": "CREATE PROCEDURE [Foo].[Proc.With.SpecialChar] @ID INT\nAS\n SELECT @ID AS ThatDB;\n",
"input parameters": "['@ID']",
"parameter @ID": "{'type': 'int'}",
- "date_created": "2023-03-10 16:27:54.907000",
- "date_modified": "2023-03-10 16:27:54.907000"
+ "date_created": "2023-10-27 10:11:55.460000",
+ "date_modified": "2023-10-27 10:11:55.460000"
},
"externalUrl": "",
- "name": "demodata.Foo.DBs",
+ "name": "demodata.Foo.Proc.With.SpecialChar",
"type": {
"string": "MSSQL_STORED_PROCEDURE"
}
@@ -1858,12 +1973,13 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
"entityType": "dataJob",
- "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(mssql,localhost.demodata.Foo.stored_procedures,PROD),DBs)",
+ "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(mssql,localhost.demodata.Foo.stored_procedures,PROD),Proc.With.SpecialChar)",
"changeType": "UPSERT",
"aspectName": "dataJobInputOutput",
"aspect": {
@@ -1875,31 +1991,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
- }
-},
-{
- "entityType": "dataset",
- "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,DemoDataAlias.Foo.SalesReason,PROD)",
- "changeType": "UPSERT",
- "aspectName": "browsePathsV2",
- "aspect": {
- "json": {
- "path": [
- {
- "id": "urn:li:container:b7062d1c0c650d9de0f7a9a5de00b1b5",
- "urn": "urn:li:container:b7062d1c0c650d9de0f7a9a5de00b1b5"
- },
- {
- "id": "urn:li:container:6e5c6d608d0a2dcc4eb03591382e5671",
- "urn": "urn:li:container:6e5c6d608d0a2dcc4eb03591382e5671"
- }
- ]
- }
- },
- "systemMetadata": {
- "lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1920,7 +2013,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1935,7 +2029,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1950,7 +2045,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1967,7 +2063,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1982,7 +2079,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -2002,7 +2100,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -2023,7 +2122,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -2038,7 +2138,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -2053,7 +2154,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -2070,7 +2172,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -2085,7 +2188,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -2105,7 +2209,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -2126,7 +2231,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -2141,7 +2247,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -2156,7 +2263,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -2173,7 +2281,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -2188,27 +2297,34 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
- "entityType": "dataFlow",
- "entityUrn": "urn:li:dataFlow:(mssql,localhost.Weekly Demo Data Backup,PROD)",
+ "entityType": "container",
+ "entityUrn": "urn:li:container:3f157d8292fb473142f19e2250af537f",
"changeType": "UPSERT",
- "aspectName": "status",
+ "aspectName": "browsePathsV2",
"aspect": {
"json": {
- "removed": false
+ "path": [
+ {
+ "id": "urn:li:container:b7062d1c0c650d9de0f7a9a5de00b1b5",
+ "urn": "urn:li:container:b7062d1c0c650d9de0f7a9a5de00b1b5"
+ }
+ ]
}
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
"entityType": "dataFlow",
- "entityUrn": "urn:li:dataFlow:(mssql,localhost.demodata.Foo.stored_procedures,PROD)",
+ "entityUrn": "urn:li:dataFlow:(mssql,localhost.Weekly Demo Data Backup,PROD)",
"changeType": "UPSERT",
"aspectName": "status",
"aspect": {
@@ -2218,12 +2334,13 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
- "entityType": "dataJob",
- "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(mssql,localhost.Weekly Demo Data Backup,PROD),localhost.Weekly Demo Data Backup)",
+ "entityType": "dataFlow",
+ "entityUrn": "urn:li:dataFlow:(mssql,localhost.demodata.Foo.stored_procedures,PROD)",
"changeType": "UPSERT",
"aspectName": "status",
"aspect": {
@@ -2233,12 +2350,13 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
"entityType": "dataJob",
- "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(mssql,localhost.demodata.Foo.stored_procedures,PROD),DBs)",
+ "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(mssql,localhost.Weekly Demo Data Backup,PROD),localhost.Weekly Demo Data Backup)",
"changeType": "UPSERT",
"aspectName": "status",
"aspect": {
@@ -2248,27 +2366,24 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
- "entityType": "container",
- "entityUrn": "urn:li:container:3f157d8292fb473142f19e2250af537f",
+ "entityType": "dataJob",
+ "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(mssql,localhost.demodata.Foo.stored_procedures,PROD),Proc.With.SpecialChar)",
"changeType": "UPSERT",
- "aspectName": "browsePathsV2",
+ "aspectName": "status",
"aspect": {
"json": {
- "path": [
- {
- "id": "urn:li:container:b7062d1c0c650d9de0f7a9a5de00b1b5",
- "urn": "urn:li:container:b7062d1c0c650d9de0f7a9a5de00b1b5"
- }
- ]
+ "removed": false
}
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
}
]
\ No newline at end of file
diff --git a/metadata-ingestion/tests/integration/sql_server/golden_files/golden_mces_mssql_with_lower_case_urn.json b/metadata-ingestion/tests/integration/sql_server/golden_files/golden_mces_mssql_with_lower_case_urn.json
index d25d23daae2ea..9d1b288057a16 100644
--- a/metadata-ingestion/tests/integration/sql_server/golden_files/golden_mces_mssql_with_lower_case_urn.json
+++ b/metadata-ingestion/tests/integration/sql_server/golden_files/golden_mces_mssql_with_lower_case_urn.json
@@ -16,7 +16,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -31,7 +32,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -46,7 +48,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -63,7 +66,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -78,7 +82,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -95,7 +100,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -106,11 +112,11 @@
"aspect": {
"json": {
"customProperties": {
- "job_id": "b6a0c1e2-f90a-4c86-a226-bf7ca59ad79f",
+ "job_id": "1f2f14ba-db84-4fa1-910e-7df71bede642",
"job_name": "Weekly Demo Data Backup",
"description": "No description available.",
- "date_created": "2023-08-06 21:01:05.157000",
- "date_modified": "2023-08-06 21:01:05.283000",
+ "date_created": "2023-10-27 10:11:55.540000",
+ "date_modified": "2023-10-27 10:11:55.667000",
"step_id": "1",
"step_name": "Set database to read only",
"subsystem": "TSQL",
@@ -125,7 +131,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -142,7 +149,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -163,7 +171,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -178,7 +187,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -193,7 +203,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -210,7 +221,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -225,7 +237,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -245,7 +258,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -266,7 +280,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -281,7 +296,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -296,7 +312,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -313,7 +330,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -328,7 +346,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -348,7 +367,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -369,7 +389,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -384,7 +405,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -399,7 +421,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -416,7 +439,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -431,7 +455,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -451,7 +476,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -472,7 +498,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -487,7 +514,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -502,7 +530,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -519,7 +548,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -534,7 +564,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -554,7 +585,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -575,7 +607,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -590,7 +623,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -605,7 +639,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -622,7 +657,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -637,7 +673,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -657,7 +694,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -678,7 +716,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -693,7 +732,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -708,7 +748,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -725,7 +766,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -740,7 +782,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -760,7 +803,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -781,7 +825,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -796,7 +841,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -811,7 +857,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -828,7 +875,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -843,7 +891,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -863,7 +912,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -884,7 +934,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -899,7 +950,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -914,7 +966,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -931,7 +984,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -946,7 +1000,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -966,7 +1021,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -987,7 +1043,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1002,7 +1059,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1017,7 +1075,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1034,7 +1093,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1049,7 +1109,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1069,7 +1130,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1090,7 +1152,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1105,7 +1168,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1120,7 +1184,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1137,7 +1202,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1152,7 +1218,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1172,7 +1239,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1187,7 +1255,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1259,7 +1328,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1276,7 +1346,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1300,7 +1371,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1321,7 +1393,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1336,7 +1409,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1351,7 +1425,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1368,7 +1443,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1383,7 +1459,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1403,7 +1480,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1418,7 +1496,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1491,7 +1570,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1508,7 +1588,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1532,7 +1613,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1547,7 +1629,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1644,7 +1727,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1661,7 +1745,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1685,7 +1770,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1700,7 +1786,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1796,7 +1883,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1813,7 +1901,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1837,7 +1926,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1854,12 +1944,13 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
"entityType": "dataJob",
- "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(mssql,localhost.demodata.Foo.stored_procedures,PROD),DBs)",
+ "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(mssql,localhost.demodata.Foo.stored_procedures,PROD),Proc.With.SpecialChar)",
"changeType": "UPSERT",
"aspectName": "dataJobInfo",
"aspect": {
@@ -1867,14 +1958,14 @@
"customProperties": {
"procedure_depends_on": "{}",
"depending_on_procedure": "{}",
- "code": "CREATE PROCEDURE Foo.DBs @ID INT\nAS\n SELECT @ID AS ThatDB;\n",
+ "code": "CREATE PROCEDURE [Foo].[Proc.With.SpecialChar] @ID INT\nAS\n SELECT @ID AS ThatDB;\n",
"input parameters": "['@ID']",
"parameter @ID": "{'type': 'int'}",
- "date_created": "2023-08-06 21:01:05.093000",
- "date_modified": "2023-08-06 21:01:05.093000"
+ "date_created": "2023-10-27 10:11:55.460000",
+ "date_modified": "2023-10-27 10:11:55.460000"
},
"externalUrl": "",
- "name": "demodata.Foo.DBs",
+ "name": "demodata.Foo.Proc.With.SpecialChar",
"type": {
"string": "MSSQL_STORED_PROCEDURE"
}
@@ -1882,12 +1973,13 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
"entityType": "dataJob",
- "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(mssql,localhost.demodata.Foo.stored_procedures,PROD),DBs)",
+ "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(mssql,localhost.demodata.Foo.stored_procedures,PROD),Proc.With.SpecialChar)",
"changeType": "UPSERT",
"aspectName": "dataJobInputOutput",
"aspect": {
@@ -1899,7 +1991,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1920,7 +2013,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1935,7 +2029,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1950,7 +2045,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1967,7 +2063,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1982,7 +2079,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -2002,7 +2100,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -2023,7 +2122,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -2038,7 +2138,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -2053,7 +2154,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -2070,7 +2172,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -2085,7 +2188,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -2105,7 +2209,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -2126,7 +2231,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -2141,7 +2247,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -2156,7 +2263,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -2173,7 +2281,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -2188,7 +2297,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -2208,7 +2318,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -2223,7 +2334,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -2238,7 +2350,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -2253,12 +2366,13 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
"entityType": "dataJob",
- "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(mssql,localhost.demodata.Foo.stored_procedures,PROD),DBs)",
+ "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(mssql,localhost.demodata.Foo.stored_procedures,PROD),Proc.With.SpecialChar)",
"changeType": "UPSERT",
"aspectName": "status",
"aspect": {
@@ -2268,7 +2382,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mssql-test"
+ "runId": "mssql-test",
+ "lastRunId": "no-run-id-provided"
}
}
]
\ No newline at end of file
diff --git a/metadata-ingestion/tests/integration/sql_server/setup/setup.sql b/metadata-ingestion/tests/integration/sql_server/setup/setup.sql
index c1347a7c8caca..a17d52f9a39b1 100644
--- a/metadata-ingestion/tests/integration/sql_server/setup/setup.sql
+++ b/metadata-ingestion/tests/integration/sql_server/setup/setup.sql
@@ -45,7 +45,7 @@ CREATE TABLE Foo.SalesReason
)
;
GO
-CREATE PROCEDURE Foo.DBs @ID INT
+CREATE PROCEDURE [Foo].[Proc.With.SpecialChar] @ID INT
AS
SELECT @ID AS ThatDB;
GO
From 8b1d2094aa768d1f795a2d240de888e65c26f6fc Mon Sep 17 00:00:00 2001
From: richenc <125420929+richenc@users.noreply.github.com>
Date: Fri, 27 Oct 2023 10:36:47 -0700
Subject: [PATCH 012/792] feat(airflow): retry callback, support
ExternalTaskSensor subclasses (#8514)
Co-authored-by: Richie Chen
Co-authored-by: Harshal Sheth
---
.../client/airflow_generator.py | 7 +++-
.../datahub_plugin_v22.py | 36 ++++++++++++++++++-
.../integration/goldens/v1_basic_iolets.json | 7 +++-
.../integration/goldens/v1_simple_dag.json | 14 ++++++--
.../integration/goldens/v2_basic_iolets.json | 7 +++-
.../v2_basic_iolets_no_dag_listener.json | 7 +++-
.../integration/goldens/v2_simple_dag.json | 12 +++++--
.../v2_simple_dag_no_dag_listener.json | 14 ++++++--
.../goldens/v2_snowflake_operator.json | 7 +++-
.../goldens/v2_sqlite_operator.json | 27 +++++++++++---
.../v2_sqlite_operator_no_dag_listener.json | 35 +++++++++++++++---
11 files changed, 151 insertions(+), 22 deletions(-)
diff --git a/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/client/airflow_generator.py b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/client/airflow_generator.py
index 16585f70e820b..e1d53be7bae6b 100644
--- a/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/client/airflow_generator.py
+++ b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/client/airflow_generator.py
@@ -98,7 +98,7 @@ def _get_dependencies(
# It is possible to tie an external sensor to DAG if external_task_id is omitted but currently we can't tie
# jobflow to anothet jobflow.
external_task_upstreams = []
- if task.task_type == "ExternalTaskSensor":
+ if isinstance(task, ExternalTaskSensor):
task = cast(ExternalTaskSensor, task)
if hasattr(task, "external_task_id") and task.external_task_id is not None:
external_task_upstreams = [
@@ -155,6 +155,8 @@ def generate_dataflow(
"_concurrency",
# "_default_view",
"catchup",
+ "description",
+ "doc_md",
"fileloc",
"is_paused_upon_creation",
"start_date",
@@ -431,6 +433,9 @@ def run_datajob(
job_property_bag["operator"] = str(ti.operator)
job_property_bag["priority_weight"] = str(ti.priority_weight)
job_property_bag["log_url"] = ti.log_url
+ job_property_bag["orchestrator"] = "airflow"
+ job_property_bag["dag_id"] = str(dag.dag_id)
+ job_property_bag["task_id"] = str(ti.task_id)
dpi.properties.update(job_property_bag)
dpi.url = ti.log_url
diff --git a/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/datahub_plugin_v22.py b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/datahub_plugin_v22.py
index 046fbb5efaa03..f9a2119f51e32 100644
--- a/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/datahub_plugin_v22.py
+++ b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/datahub_plugin_v22.py
@@ -23,6 +23,7 @@
TASK_ON_FAILURE_CALLBACK = "on_failure_callback"
TASK_ON_SUCCESS_CALLBACK = "on_success_callback"
+TASK_ON_RETRY_CALLBACK = "on_retry_callback"
def get_task_inlets_advanced(task: BaseOperator, context: Any) -> Iterable[Any]:
@@ -259,6 +260,28 @@ def custom_on_success_callback(context):
return custom_on_success_callback
+def _wrap_on_retry_callback(on_retry_callback):
+ def custom_on_retry_callback(context):
+ config = get_lineage_config()
+ if config.enabled:
+ context["_datahub_config"] = config
+ try:
+ datahub_task_status_callback(
+ context, status=InstanceRunResult.UP_FOR_RETRY
+ )
+ except Exception as e:
+ if not config.graceful_exceptions:
+ raise e
+ else:
+ print(f"Exception: {traceback.format_exc()}")
+
+ # Call original policy
+ if on_retry_callback:
+ on_retry_callback(context)
+
+ return custom_on_retry_callback
+
+
def task_policy(task: Union[BaseOperator, MappedOperator]) -> None:
task.log.debug(f"Setting task policy for Dag: {task.dag_id} Task: {task.task_id}")
# task.add_inlets(["auto"])
@@ -274,7 +297,14 @@ def task_policy(task: Union[BaseOperator, MappedOperator]) -> None:
on_success_callback_prop: property = getattr(
MappedOperator, TASK_ON_SUCCESS_CALLBACK
)
- if not on_failure_callback_prop.fset or not on_success_callback_prop.fset:
+ on_retry_callback_prop: property = getattr(
+ MappedOperator, TASK_ON_RETRY_CALLBACK
+ )
+ if (
+ not on_failure_callback_prop.fset
+ or not on_success_callback_prop.fset
+ or not on_retry_callback_prop.fset
+ ):
task.log.debug(
"Using MappedOperator's partial_kwargs instead of callback properties"
)
@@ -284,10 +314,14 @@ def task_policy(task: Union[BaseOperator, MappedOperator]) -> None:
task.partial_kwargs[TASK_ON_SUCCESS_CALLBACK] = _wrap_on_success_callback(
task.on_success_callback
)
+ task.partial_kwargs[TASK_ON_RETRY_CALLBACK] = _wrap_on_retry_callback(
+ task.on_retry_callback
+ )
return
task.on_failure_callback = _wrap_on_failure_callback(task.on_failure_callback) # type: ignore
task.on_success_callback = _wrap_on_success_callback(task.on_success_callback) # type: ignore
+ task.on_retry_callback = _wrap_on_retry_callback(task.on_retry_callback) # type: ignore
# task.pre_execute = _wrap_pre_execution(task.pre_execute)
diff --git a/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v1_basic_iolets.json b/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v1_basic_iolets.json
index 26aa2afaa831a..a4c17c73e9c7e 100644
--- a/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v1_basic_iolets.json
+++ b/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v1_basic_iolets.json
@@ -9,6 +9,8 @@
"customProperties": {
"_access_control": "None",
"catchup": "False",
+ "description": "None",
+ "doc_md": "None",
"fileloc": "'/Users/hsheth/projects/datahub/metadata-ingestion-modules/airflow-plugin/tests/integration/dags/basic_iolets.py'",
"is_paused_upon_creation": "None",
"start_date": "DateTime(2023, 1, 1, 0, 0, 0, tzinfo=Timezone('UTC'))",
@@ -373,7 +375,10 @@
"state": "success",
"operator": "BashOperator",
"priority_weight": "1",
- "log_url": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=run_data_task&dag_id=basic_iolets"
+ "log_url": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=run_data_task&dag_id=basic_iolets",
+ "orchestrator": "airflow",
+ "dag_id": "basic_iolets",
+ "task_id": "run_data_task"
},
"externalUrl": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=run_data_task&dag_id=basic_iolets",
"name": "basic_iolets_run_data_task_manual_run_test",
diff --git a/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v1_simple_dag.json b/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v1_simple_dag.json
index b2e3a1fe47da7..a0a95716a0993 100644
--- a/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v1_simple_dag.json
+++ b/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v1_simple_dag.json
@@ -9,6 +9,8 @@
"customProperties": {
"_access_control": "None",
"catchup": "False",
+ "description": "'A simple DAG that runs a few fake data tasks.'",
+ "doc_md": "None",
"fileloc": "'/Users/hsheth/projects/datahub/metadata-ingestion-modules/airflow-plugin/tests/integration/dags/simple_dag.py'",
"is_paused_upon_creation": "None",
"start_date": "DateTime(2023, 1, 1, 0, 0, 0, tzinfo=Timezone('UTC'))",
@@ -302,7 +304,10 @@
"state": "success",
"operator": "BashOperator",
"priority_weight": "2",
- "log_url": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=task_1&dag_id=simple_dag"
+ "log_url": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=task_1&dag_id=simple_dag",
+ "orchestrator": "airflow",
+ "dag_id": "simple_dag",
+ "task_id": "task_1"
},
"externalUrl": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=task_1&dag_id=simple_dag",
"name": "simple_dag_task_1_manual_run_test",
@@ -433,6 +438,8 @@
"customProperties": {
"_access_control": "None",
"catchup": "False",
+ "description": "'A simple DAG that runs a few fake data tasks.'",
+ "doc_md": "None",
"fileloc": "'/Users/hsheth/projects/datahub/metadata-ingestion-modules/airflow-plugin/tests/integration/dags/simple_dag.py'",
"is_paused_upon_creation": "None",
"start_date": "DateTime(2023, 1, 1, 0, 0, 0, tzinfo=Timezone('UTC'))",
@@ -654,7 +661,10 @@
"state": "success",
"operator": "BashOperator",
"priority_weight": "1",
- "log_url": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=run_another_data_task&dag_id=simple_dag"
+ "log_url": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=run_another_data_task&dag_id=simple_dag",
+ "orchestrator": "airflow",
+ "dag_id": "simple_dag",
+ "task_id": "run_another_data_task"
},
"externalUrl": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=run_another_data_task&dag_id=simple_dag",
"name": "simple_dag_run_another_data_task_manual_run_test",
diff --git a/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v2_basic_iolets.json b/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v2_basic_iolets.json
index 2e733c2ad40a9..1974f1f085df0 100644
--- a/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v2_basic_iolets.json
+++ b/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v2_basic_iolets.json
@@ -9,6 +9,8 @@
"customProperties": {
"_access_control": "None",
"catchup": "False",
+ "description": "None",
+ "doc_md": "None",
"fileloc": "'/Users/hsheth/projects/datahub/metadata-ingestion-modules/airflow-plugin/tests/integration/dags/basic_iolets.py'",
"is_paused_upon_creation": "None",
"start_date": "DateTime(2023, 1, 1, 0, 0, 0, tzinfo=Timezone('UTC'))",
@@ -224,7 +226,10 @@
"state": "running",
"operator": "BashOperator",
"priority_weight": "1",
- "log_url": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=run_data_task&dag_id=basic_iolets&map_index=-1"
+ "log_url": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=run_data_task&dag_id=basic_iolets&map_index=-1",
+ "orchestrator": "airflow",
+ "dag_id": "basic_iolets",
+ "task_id": "run_data_task"
},
"externalUrl": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=run_data_task&dag_id=basic_iolets&map_index=-1",
"name": "basic_iolets_run_data_task_manual_run_test",
diff --git a/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v2_basic_iolets_no_dag_listener.json b/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v2_basic_iolets_no_dag_listener.json
index 44b288efda954..d02951bc9e82d 100644
--- a/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v2_basic_iolets_no_dag_listener.json
+++ b/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v2_basic_iolets_no_dag_listener.json
@@ -9,6 +9,8 @@
"customProperties": {
"_access_control": "None",
"catchup": "False",
+ "description": "None",
+ "doc_md": "None",
"fileloc": "'/Users/hsheth/projects/datahub/metadata-ingestion-modules/airflow-plugin/tests/integration/dags/basic_iolets.py'",
"is_paused_upon_creation": "None",
"start_date": "DateTime(2023, 1, 1, 0, 0, 0, tzinfo=Timezone('UTC'))",
@@ -224,7 +226,10 @@
"state": "running",
"operator": "BashOperator",
"priority_weight": "1",
- "log_url": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=run_data_task&dag_id=basic_iolets&map_index=-1"
+ "log_url": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=run_data_task&dag_id=basic_iolets&map_index=-1",
+ "orchestrator": "airflow",
+ "dag_id": "basic_iolets",
+ "task_id": "run_data_task"
},
"externalUrl": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=run_data_task&dag_id=basic_iolets&map_index=-1",
"name": "basic_iolets_run_data_task_manual_run_test",
diff --git a/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v2_simple_dag.json b/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v2_simple_dag.json
index 454c509279e11..9acc47ec1321e 100644
--- a/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v2_simple_dag.json
+++ b/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v2_simple_dag.json
@@ -9,6 +9,8 @@
"customProperties": {
"_access_control": "None",
"catchup": "False",
+ "description": "'A simple DAG that runs a few fake data tasks.'",
+ "doc_md": "None",
"fileloc": "'/Users/hsheth/projects/datahub/metadata-ingestion-modules/airflow-plugin/tests/integration/dags/simple_dag.py'",
"is_paused_upon_creation": "None",
"start_date": "DateTime(2023, 1, 1, 0, 0, 0, tzinfo=Timezone('UTC'))",
@@ -189,7 +191,10 @@
"state": "running",
"operator": "BashOperator",
"priority_weight": "2",
- "log_url": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=task_1&dag_id=simple_dag&map_index=-1"
+ "log_url": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=task_1&dag_id=simple_dag&map_index=-1",
+ "orchestrator": "airflow",
+ "dag_id": "simple_dag",
+ "task_id": "task_1"
},
"externalUrl": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=task_1&dag_id=simple_dag&map_index=-1",
"name": "simple_dag_task_1_manual_run_test",
@@ -523,7 +528,10 @@
"state": "running",
"operator": "BashOperator",
"priority_weight": "1",
- "log_url": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=run_another_data_task&dag_id=simple_dag&map_index=-1"
+ "log_url": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=run_another_data_task&dag_id=simple_dag&map_index=-1",
+ "orchestrator": "airflow",
+ "dag_id": "simple_dag",
+ "task_id": "run_another_data_task"
},
"externalUrl": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=run_another_data_task&dag_id=simple_dag&map_index=-1",
"name": "simple_dag_run_another_data_task_manual_run_test",
diff --git a/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v2_simple_dag_no_dag_listener.json b/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v2_simple_dag_no_dag_listener.json
index 73b5765e96b7d..03299c483f57f 100644
--- a/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v2_simple_dag_no_dag_listener.json
+++ b/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v2_simple_dag_no_dag_listener.json
@@ -9,6 +9,8 @@
"customProperties": {
"_access_control": "None",
"catchup": "False",
+ "description": "'A simple DAG that runs a few fake data tasks.'",
+ "doc_md": "None",
"fileloc": "'/Users/hsheth/projects/datahub/metadata-ingestion-modules/airflow-plugin/tests/integration/dags/simple_dag.py'",
"is_paused_upon_creation": "None",
"start_date": "DateTime(2023, 1, 1, 0, 0, 0, tzinfo=Timezone('UTC'))",
@@ -189,7 +191,10 @@
"state": "running",
"operator": "BashOperator",
"priority_weight": "2",
- "log_url": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=task_1&dag_id=simple_dag&map_index=-1"
+ "log_url": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=task_1&dag_id=simple_dag&map_index=-1",
+ "orchestrator": "airflow",
+ "dag_id": "simple_dag",
+ "task_id": "task_1"
},
"externalUrl": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=task_1&dag_id=simple_dag&map_index=-1",
"name": "simple_dag_task_1_manual_run_test",
@@ -435,6 +440,8 @@
"customProperties": {
"_access_control": "None",
"catchup": "False",
+ "description": "'A simple DAG that runs a few fake data tasks.'",
+ "doc_md": "None",
"fileloc": "'/Users/hsheth/projects/datahub/metadata-ingestion-modules/airflow-plugin/tests/integration/dags/simple_dag.py'",
"is_paused_upon_creation": "None",
"start_date": "DateTime(2023, 1, 1, 0, 0, 0, tzinfo=Timezone('UTC'))",
@@ -579,7 +586,10 @@
"state": "running",
"operator": "BashOperator",
"priority_weight": "1",
- "log_url": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=run_another_data_task&dag_id=simple_dag&map_index=-1"
+ "log_url": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=run_another_data_task&dag_id=simple_dag&map_index=-1",
+ "orchestrator": "airflow",
+ "dag_id": "simple_dag",
+ "task_id": "run_another_data_task"
},
"externalUrl": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=run_another_data_task&dag_id=simple_dag&map_index=-1",
"name": "simple_dag_run_another_data_task_manual_run_test",
diff --git a/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v2_snowflake_operator.json b/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v2_snowflake_operator.json
index affc395d421da..11a0b17b45b95 100644
--- a/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v2_snowflake_operator.json
+++ b/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v2_snowflake_operator.json
@@ -9,6 +9,8 @@
"customProperties": {
"_access_control": "None",
"catchup": "False",
+ "description": "None",
+ "doc_md": "None",
"fileloc": "'/Users/hsheth/projects/datahub/metadata-ingestion-modules/airflow-plugin/tests/integration/dags/snowflake_operator.py'",
"is_paused_upon_creation": "None",
"start_date": "DateTime(2023, 1, 1, 0, 0, 0, tzinfo=Timezone('UTC'))",
@@ -234,7 +236,10 @@
"state": "running",
"operator": "SnowflakeOperator",
"priority_weight": "1",
- "log_url": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=transform_cost_table&dag_id=snowflake_operator&map_index=-1"
+ "log_url": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=transform_cost_table&dag_id=snowflake_operator&map_index=-1",
+ "orchestrator": "airflow",
+ "dag_id": "snowflake_operator",
+ "task_id": "transform_cost_table"
},
"externalUrl": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=transform_cost_table&dag_id=snowflake_operator&map_index=-1",
"name": "snowflake_operator_transform_cost_table_manual_run_test",
diff --git a/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v2_sqlite_operator.json b/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v2_sqlite_operator.json
index 81d0a71b651d9..19e4aac9fb95e 100644
--- a/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v2_sqlite_operator.json
+++ b/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v2_sqlite_operator.json
@@ -9,6 +9,8 @@
"customProperties": {
"_access_control": "None",
"catchup": "False",
+ "description": "None",
+ "doc_md": "None",
"fileloc": "'/Users/hsheth/projects/datahub/metadata-ingestion-modules/airflow-plugin/tests/integration/dags/sqlite_operator.py'",
"is_paused_upon_creation": "None",
"start_date": "DateTime(2023, 1, 1, 0, 0, 0, tzinfo=Timezone('UTC'))",
@@ -201,7 +203,10 @@
"state": "running",
"operator": "SqliteOperator",
"priority_weight": "5",
- "log_url": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=create_cost_table&dag_id=sqlite_operator&map_index=-1"
+ "log_url": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=create_cost_table&dag_id=sqlite_operator&map_index=-1",
+ "orchestrator": "airflow",
+ "dag_id": "sqlite_operator",
+ "task_id": "create_cost_table"
},
"externalUrl": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=create_cost_table&dag_id=sqlite_operator&map_index=-1",
"name": "sqlite_operator_create_cost_table_manual_run_test",
@@ -562,7 +567,10 @@
"state": "running",
"operator": "SqliteOperator",
"priority_weight": "4",
- "log_url": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=populate_cost_table&dag_id=sqlite_operator&map_index=-1"
+ "log_url": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=populate_cost_table&dag_id=sqlite_operator&map_index=-1",
+ "orchestrator": "airflow",
+ "dag_id": "sqlite_operator",
+ "task_id": "populate_cost_table"
},
"externalUrl": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=populate_cost_table&dag_id=sqlite_operator&map_index=-1",
"name": "sqlite_operator_populate_cost_table_manual_run_test",
@@ -922,7 +930,10 @@
"state": "running",
"operator": "SqliteOperator",
"priority_weight": "3",
- "log_url": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=transform_cost_table&dag_id=sqlite_operator&map_index=-1"
+ "log_url": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=transform_cost_table&dag_id=sqlite_operator&map_index=-1",
+ "orchestrator": "airflow",
+ "dag_id": "sqlite_operator",
+ "task_id": "transform_cost_table"
},
"externalUrl": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=transform_cost_table&dag_id=sqlite_operator&map_index=-1",
"name": "sqlite_operator_transform_cost_table_manual_run_test",
@@ -1364,7 +1375,10 @@
"state": "running",
"operator": "SqliteOperator",
"priority_weight": "1",
- "log_url": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=cleanup_costs&dag_id=sqlite_operator&map_index=-1"
+ "log_url": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=cleanup_costs&dag_id=sqlite_operator&map_index=-1",
+ "orchestrator": "airflow",
+ "dag_id": "sqlite_operator",
+ "task_id": "cleanup_costs"
},
"externalUrl": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=cleanup_costs&dag_id=sqlite_operator&map_index=-1",
"name": "sqlite_operator_cleanup_costs_manual_run_test",
@@ -1658,7 +1672,10 @@
"state": "running",
"operator": "SqliteOperator",
"priority_weight": "1",
- "log_url": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=cleanup_processed_costs&dag_id=sqlite_operator&map_index=-1"
+ "log_url": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=cleanup_processed_costs&dag_id=sqlite_operator&map_index=-1",
+ "orchestrator": "airflow",
+ "dag_id": "sqlite_operator",
+ "task_id": "cleanup_processed_costs"
},
"externalUrl": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=cleanup_processed_costs&dag_id=sqlite_operator&map_index=-1",
"name": "sqlite_operator_cleanup_processed_costs_manual_run_test",
diff --git a/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v2_sqlite_operator_no_dag_listener.json b/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v2_sqlite_operator_no_dag_listener.json
index 96a0f02ccec17..b67464b385335 100644
--- a/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v2_sqlite_operator_no_dag_listener.json
+++ b/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v2_sqlite_operator_no_dag_listener.json
@@ -9,6 +9,8 @@
"customProperties": {
"_access_control": "None",
"catchup": "False",
+ "description": "None",
+ "doc_md": "None",
"fileloc": "'/Users/hsheth/projects/datahub/metadata-ingestion-modules/airflow-plugin/tests/integration/dags/sqlite_operator.py'",
"is_paused_upon_creation": "None",
"start_date": "DateTime(2023, 1, 1, 0, 0, 0, tzinfo=Timezone('UTC'))",
@@ -201,7 +203,10 @@
"state": "running",
"operator": "SqliteOperator",
"priority_weight": "5",
- "log_url": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=create_cost_table&dag_id=sqlite_operator&map_index=-1"
+ "log_url": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=create_cost_table&dag_id=sqlite_operator&map_index=-1",
+ "orchestrator": "airflow",
+ "dag_id": "sqlite_operator",
+ "task_id": "create_cost_table"
},
"externalUrl": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=create_cost_table&dag_id=sqlite_operator&map_index=-1",
"name": "sqlite_operator_create_cost_table_manual_run_test",
@@ -460,6 +465,8 @@
"customProperties": {
"_access_control": "None",
"catchup": "False",
+ "description": "None",
+ "doc_md": "None",
"fileloc": "'/Users/hsheth/projects/datahub/metadata-ingestion-modules/airflow-plugin/tests/integration/dags/sqlite_operator.py'",
"is_paused_upon_creation": "None",
"start_date": "DateTime(2023, 1, 1, 0, 0, 0, tzinfo=Timezone('UTC'))",
@@ -617,7 +624,10 @@
"state": "running",
"operator": "SqliteOperator",
"priority_weight": "4",
- "log_url": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=populate_cost_table&dag_id=sqlite_operator&map_index=-1"
+ "log_url": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=populate_cost_table&dag_id=sqlite_operator&map_index=-1",
+ "orchestrator": "airflow",
+ "dag_id": "sqlite_operator",
+ "task_id": "populate_cost_table"
},
"externalUrl": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=populate_cost_table&dag_id=sqlite_operator&map_index=-1",
"name": "sqlite_operator_populate_cost_table_manual_run_test",
@@ -805,6 +815,8 @@
"customProperties": {
"_access_control": "None",
"catchup": "False",
+ "description": "None",
+ "doc_md": "None",
"fileloc": "'/Users/hsheth/projects/datahub/metadata-ingestion-modules/airflow-plugin/tests/integration/dags/sqlite_operator.py'",
"is_paused_upon_creation": "None",
"start_date": "DateTime(2023, 1, 1, 0, 0, 0, tzinfo=Timezone('UTC'))",
@@ -1032,7 +1044,10 @@
"state": "running",
"operator": "SqliteOperator",
"priority_weight": "3",
- "log_url": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=transform_cost_table&dag_id=sqlite_operator&map_index=-1"
+ "log_url": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=transform_cost_table&dag_id=sqlite_operator&map_index=-1",
+ "orchestrator": "airflow",
+ "dag_id": "sqlite_operator",
+ "task_id": "transform_cost_table"
},
"externalUrl": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=transform_cost_table&dag_id=sqlite_operator&map_index=-1",
"name": "sqlite_operator_transform_cost_table_manual_run_test",
@@ -1370,6 +1385,8 @@
"customProperties": {
"_access_control": "None",
"catchup": "False",
+ "description": "None",
+ "doc_md": "None",
"fileloc": "'/Users/hsheth/projects/datahub/metadata-ingestion-modules/airflow-plugin/tests/integration/dags/sqlite_operator.py'",
"is_paused_upon_creation": "None",
"start_date": "DateTime(2023, 1, 1, 0, 0, 0, tzinfo=Timezone('UTC'))",
@@ -1529,7 +1546,10 @@
"state": "running",
"operator": "SqliteOperator",
"priority_weight": "1",
- "log_url": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=cleanup_costs&dag_id=sqlite_operator&map_index=-1"
+ "log_url": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=cleanup_costs&dag_id=sqlite_operator&map_index=-1",
+ "orchestrator": "airflow",
+ "dag_id": "sqlite_operator",
+ "task_id": "cleanup_costs"
},
"externalUrl": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=cleanup_costs&dag_id=sqlite_operator&map_index=-1",
"name": "sqlite_operator_cleanup_costs_manual_run_test",
@@ -1719,6 +1739,8 @@
"customProperties": {
"_access_control": "None",
"catchup": "False",
+ "description": "None",
+ "doc_md": "None",
"fileloc": "'/Users/hsheth/projects/datahub/metadata-ingestion-modules/airflow-plugin/tests/integration/dags/sqlite_operator.py'",
"is_paused_upon_creation": "None",
"start_date": "DateTime(2023, 1, 1, 0, 0, 0, tzinfo=Timezone('UTC'))",
@@ -1878,7 +1900,10 @@
"state": "running",
"operator": "SqliteOperator",
"priority_weight": "1",
- "log_url": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=cleanup_processed_costs&dag_id=sqlite_operator&map_index=-1"
+ "log_url": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=cleanup_processed_costs&dag_id=sqlite_operator&map_index=-1",
+ "orchestrator": "airflow",
+ "dag_id": "sqlite_operator",
+ "task_id": "cleanup_processed_costs"
},
"externalUrl": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=cleanup_processed_costs&dag_id=sqlite_operator&map_index=-1",
"name": "sqlite_operator_cleanup_processed_costs_manual_run_test",
From 649f6d031789252fb9ac97d932fd71396f4875f2 Mon Sep 17 00:00:00 2001
From: Hyejin Yoon <0327jane@gmail.com>
Date: Sat, 28 Oct 2023 04:02:43 +0900
Subject: [PATCH 013/792] docs: fix saasonly flags for some pages (#9124)
---
docs-website/sidebars.js | 29 +++++++++++++++++++-----
docs/managed-datahub/chrome-extension.md | 2 --
2 files changed, 23 insertions(+), 8 deletions(-)
diff --git a/docs-website/sidebars.js b/docs-website/sidebars.js
index 31d69aec46d8b..39eaea57444ed 100644
--- a/docs-website/sidebars.js
+++ b/docs-website/sidebars.js
@@ -442,11 +442,29 @@ module.exports = {
},
"docs/act-on-metadata/impact-analysis",
{
- Observability: [
- "docs/managed-datahub/observe/freshness-assertions",
- "docs/managed-datahub/observe/volume-assertions",
- "docs/managed-datahub/observe/custom-sql-assertions",
- "docs/managed-datahub/observe/column-assertions",
+ label: "Observability",
+ type: "category",
+ items: [
+ {
+ type: "doc",
+ id: "docs/managed-datahub/observe/freshness-assertions",
+ className: "saasOnly",
+ },
+ {
+ type: "doc",
+ id: "docs/managed-datahub/observe/volume-assertions",
+ className: "saasOnly",
+ },
+ {
+ type: "doc",
+ id: "docs/managed-datahub/observe/custom-sql-assertions",
+ className: "saasOnly",
+ },
+ {
+ type: "doc",
+ id: "docs/managed-datahub/observe/column-assertions",
+ className: "saasOnly",
+ },
],
},
{
@@ -606,7 +624,6 @@ module.exports = {
{
type: "doc",
id: "docs/managed-datahub/chrome-extension",
- className: "saasOnly",
},
{
"Managed DataHub Release History": [
diff --git a/docs/managed-datahub/chrome-extension.md b/docs/managed-datahub/chrome-extension.md
index 0aa0860d03b67..a4560bc8cc09b 100644
--- a/docs/managed-datahub/chrome-extension.md
+++ b/docs/managed-datahub/chrome-extension.md
@@ -1,10 +1,8 @@
---
description: Learn how to upload and use the Acryl DataHub Chrome extension (beta) locally before it's available on the Chrome store.
---
-import FeatureAvailability from '@site/src/components/FeatureAvailability';
# Acryl DataHub Chrome Extension
-
## Installing the Extension
From e02b9096bd68c14944e640dbd3a235651ecebbaf Mon Sep 17 00:00:00 2001
From: Mayuri Nehate <33225191+mayurinehate@users.noreply.github.com>
Date: Sat, 28 Oct 2023 00:33:43 +0530
Subject: [PATCH 014/792] fix(ingest/snowflake): missing view downstream cll if
platform instance is set (#8966)
---
.../source/snowflake/snowflake_lineage_v2.py | 75 +-
.../tests/integration/snowflake/common.py | 2 +-
.../snowflake/snowflake_golden.json | 706 ++++--
.../snowflake_privatelink_golden.json | 2075 ++++++++++++-----
.../integration/snowflake/test_snowflake.py | 5 +-
5 files changed, 2055 insertions(+), 808 deletions(-)
diff --git a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_lineage_v2.py b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_lineage_v2.py
index 9649054dbe6cb..4219533dc217c 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_lineage_v2.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_lineage_v2.py
@@ -20,12 +20,12 @@
import datahub.emitter.mce_builder as builder
from datahub.emitter.mcp import MetadataChangeProposalWrapper
+from datahub.emitter.sql_parsing_builder import SqlParsingBuilder
from datahub.ingestion.api.workunit import MetadataWorkUnit
from datahub.ingestion.source.aws.s3_util import make_s3_urn_for_lineage
from datahub.ingestion.source.snowflake.constants import (
LINEAGE_PERMISSION_ERROR,
SnowflakeEdition,
- SnowflakeObjectDomain,
)
from datahub.ingestion.source.snowflake.snowflake_config import SnowflakeV2Config
from datahub.ingestion.source.snowflake.snowflake_query import SnowflakeQuery
@@ -53,7 +53,6 @@
sqlglot_lineage,
)
from datahub.utilities.time import ts_millis_to_datetime
-from datahub.utilities.urns.dataset_urn import DatasetUrn
logger: logging.Logger = logging.getLogger(__name__)
@@ -195,20 +194,6 @@ def get_table_upstream_workunits(
f"Upstream lineage detected for {self.report.num_tables_with_upstreams} tables.",
)
- def _gen_workunit_from_sql_parsing_result(
- self,
- dataset_identifier: str,
- result: SqlParsingResult,
- ) -> Iterable[MetadataWorkUnit]:
- upstreams, fine_upstreams = self.get_upstreams_from_sql_parsing_result(
- self.dataset_urn_builder(dataset_identifier), result
- )
- if upstreams:
- self.report.num_views_with_upstreams += 1
- yield self._create_upstream_lineage_workunit(
- dataset_identifier, upstreams, fine_upstreams
- )
-
def _gen_workunits_from_query_result(
self,
discovered_assets: Collection[str],
@@ -242,18 +227,31 @@ def get_view_upstream_workunits(
schema_resolver: SchemaResolver,
view_definitions: MutableMapping[str, str],
) -> Iterable[MetadataWorkUnit]:
- views_processed = set()
+ views_failed_parsing = set()
if self.config.include_view_column_lineage:
with PerfTimer() as timer:
+ builder = SqlParsingBuilder(
+ generate_lineage=True,
+ generate_usage_statistics=False,
+ generate_operations=False,
+ )
for view_identifier, view_definition in view_definitions.items():
result = self._run_sql_parser(
view_identifier, view_definition, schema_resolver
)
- if result:
- views_processed.add(view_identifier)
- yield from self._gen_workunit_from_sql_parsing_result(
- view_identifier, result
+ if result and result.out_tables:
+ self.report.num_views_with_upstreams += 1
+ # This does not yield any workunits but we use
+ # yield here to execute this method
+ yield from builder.process_sql_parsing_result(
+ result=result,
+ query=view_definition,
+ is_view_ddl=True,
)
+ else:
+ views_failed_parsing.add(view_identifier)
+
+ yield from builder.gen_workunits()
self.report.view_lineage_parse_secs = timer.elapsed_seconds()
with PerfTimer() as timer:
@@ -261,7 +259,7 @@ def get_view_upstream_workunits(
if results:
yield from self._gen_workunits_from_query_result(
- set(discovered_views) - views_processed,
+ views_failed_parsing,
results,
upstream_for_view=True,
)
@@ -349,39 +347,6 @@ def get_upstreams_from_query_result_row(
return upstreams, fine_upstreams
- def get_upstreams_from_sql_parsing_result(
- self, downstream_table_urn: str, result: SqlParsingResult
- ) -> Tuple[List[UpstreamClass], List[FineGrainedLineage]]:
- # Note: This ignores the out_tables section of the sql parsing result.
- upstreams = [
- UpstreamClass(dataset=upstream_table_urn, type=DatasetLineageTypeClass.VIEW)
- for upstream_table_urn in set(result.in_tables)
- ]
-
- # Maps downstream_col -> [upstream_col]
- fine_lineage: Dict[str, Set[SnowflakeColumnId]] = defaultdict(set)
- for column_lineage in result.column_lineage or []:
- out_column = column_lineage.downstream.column
- for upstream_column_info in column_lineage.upstreams:
- upstream_table_name = DatasetUrn.create_from_string(
- upstream_column_info.table
- ).get_dataset_name()
- fine_lineage[out_column].add(
- SnowflakeColumnId(
- columnName=upstream_column_info.column,
- objectName=upstream_table_name,
- objectDomain=SnowflakeObjectDomain.VIEW.value,
- )
- )
- fine_upstreams = [
- self.build_finegrained_lineage(
- downstream_table_urn, downstream_col, upstream_cols
- )
- for downstream_col, upstream_cols in fine_lineage.items()
- ]
-
- return upstreams, list(filter(None, fine_upstreams))
-
def _populate_external_lineage_map(self, discovered_tables: List[str]) -> None:
with PerfTimer() as timer:
self.report.num_external_table_edges_scanned = 0
diff --git a/metadata-ingestion/tests/integration/snowflake/common.py b/metadata-ingestion/tests/integration/snowflake/common.py
index 81e307a78ae9e..ff448eca01071 100644
--- a/metadata-ingestion/tests/integration/snowflake/common.py
+++ b/metadata-ingestion/tests/integration/snowflake/common.py
@@ -94,7 +94,7 @@ def default_query_results( # noqa: C901
"name": "VIEW_{}".format(view_idx),
"created_on": datetime(2021, 6, 8, 0, 0, 0, 0),
"comment": "Comment for View",
- "text": None,
+ "text": f"create view view_{view_idx} as select * from table_{view_idx}",
}
for view_idx in range(1, num_views + 1)
]
diff --git a/metadata-ingestion/tests/integration/snowflake/snowflake_golden.json b/metadata-ingestion/tests/integration/snowflake/snowflake_golden.json
index a424b258e68ff..c7273fee5a2e5 100644
--- a/metadata-ingestion/tests/integration/snowflake/snowflake_golden.json
+++ b/metadata-ingestion/tests/integration/snowflake/snowflake_golden.json
@@ -24,7 +24,8 @@
},
"systemMetadata": {
"lastObserved": 1654621200000,
- "runId": "snowflake-2022_06_07-17_00_00"
+ "runId": "snowflake-2022_06_07-17_00_00",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -39,7 +40,8 @@
},
"systemMetadata": {
"lastObserved": 1654621200000,
- "runId": "snowflake-2022_06_07-17_00_00"
+ "runId": "snowflake-2022_06_07-17_00_00",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -54,7 +56,8 @@
},
"systemMetadata": {
"lastObserved": 1654621200000,
- "runId": "snowflake-2022_06_07-17_00_00"
+ "runId": "snowflake-2022_06_07-17_00_00",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -71,7 +74,8 @@
},
"systemMetadata": {
"lastObserved": 1654621200000,
- "runId": "snowflake-2022_06_07-17_00_00"
+ "runId": "snowflake-2022_06_07-17_00_00",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -90,7 +94,8 @@
},
"systemMetadata": {
"lastObserved": 1654621200000,
- "runId": "snowflake-2022_06_07-17_00_00"
+ "runId": "snowflake-2022_06_07-17_00_00",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -105,7 +110,8 @@
},
"systemMetadata": {
"lastObserved": 1654621200000,
- "runId": "snowflake-2022_06_07-17_00_00"
+ "runId": "snowflake-2022_06_07-17_00_00",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -121,7 +127,8 @@
},
"systemMetadata": {
"lastObserved": 1654621200000,
- "runId": "snowflake-2022_06_07-17_00_00"
+ "runId": "snowflake-2022_06_07-17_00_00",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -150,7 +157,8 @@
},
"systemMetadata": {
"lastObserved": 1654621200000,
- "runId": "snowflake-2022_06_07-17_00_00"
+ "runId": "snowflake-2022_06_07-17_00_00",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -165,7 +173,8 @@
},
"systemMetadata": {
"lastObserved": 1654621200000,
- "runId": "snowflake-2022_06_07-17_00_00"
+ "runId": "snowflake-2022_06_07-17_00_00",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -180,7 +189,8 @@
},
"systemMetadata": {
"lastObserved": 1654621200000,
- "runId": "snowflake-2022_06_07-17_00_00"
+ "runId": "snowflake-2022_06_07-17_00_00",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -197,7 +207,8 @@
},
"systemMetadata": {
"lastObserved": 1654621200000,
- "runId": "snowflake-2022_06_07-17_00_00"
+ "runId": "snowflake-2022_06_07-17_00_00",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -216,7 +227,8 @@
},
"systemMetadata": {
"lastObserved": 1654621200000,
- "runId": "snowflake-2022_06_07-17_00_00"
+ "runId": "snowflake-2022_06_07-17_00_00",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -231,7 +243,8 @@
},
"systemMetadata": {
"lastObserved": 1654621200000,
- "runId": "snowflake-2022_06_07-17_00_00"
+ "runId": "snowflake-2022_06_07-17_00_00",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -251,7 +264,8 @@
},
"systemMetadata": {
"lastObserved": 1654621200000,
- "runId": "snowflake-2022_06_07-17_00_00"
+ "runId": "snowflake-2022_06_07-17_00_00",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -266,7 +280,8 @@
},
"systemMetadata": {
"lastObserved": 1654621200000,
- "runId": "snowflake-2022_06_07-17_00_00"
+ "runId": "snowflake-2022_06_07-17_00_00",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -462,7 +477,8 @@
},
"systemMetadata": {
"lastObserved": 1654621200000,
- "runId": "snowflake-2022_06_07-17_00_00"
+ "runId": "snowflake-2022_06_07-17_00_00",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -488,7 +504,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "snowflake-2023_08_04-09_52_28"
+ "runId": "snowflake-2023_08_04-09_52_28",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -503,7 +520,8 @@
},
"systemMetadata": {
"lastObserved": 1654621200000,
- "runId": "snowflake-2022_06_07-17_00_00"
+ "runId": "snowflake-2022_06_07-17_00_00",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -520,7 +538,8 @@
},
"systemMetadata": {
"lastObserved": 1654621200000,
- "runId": "snowflake-2022_06_07-17_00_00"
+ "runId": "snowflake-2022_06_07-17_00_00",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -544,7 +563,8 @@
},
"systemMetadata": {
"lastObserved": 1654621200000,
- "runId": "snowflake-2022_06_07-17_00_00"
+ "runId": "snowflake-2022_06_07-17_00_00",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -559,7 +579,8 @@
},
"systemMetadata": {
"lastObserved": 1654621200000,
- "runId": "snowflake-2022_06_07-17_00_00"
+ "runId": "snowflake-2022_06_07-17_00_00",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -755,7 +776,8 @@
},
"systemMetadata": {
"lastObserved": 1654621200000,
- "runId": "snowflake-2022_06_07-17_00_00"
+ "runId": "snowflake-2022_06_07-17_00_00",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -781,7 +803,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "snowflake-2023_08_04-09_52_28"
+ "runId": "snowflake-2023_08_04-09_52_28",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -796,7 +819,8 @@
},
"systemMetadata": {
"lastObserved": 1654621200000,
- "runId": "snowflake-2022_06_07-17_00_00"
+ "runId": "snowflake-2022_06_07-17_00_00",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -813,7 +837,8 @@
},
"systemMetadata": {
"lastObserved": 1654621200000,
- "runId": "snowflake-2022_06_07-17_00_00"
+ "runId": "snowflake-2022_06_07-17_00_00",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -837,7 +862,8 @@
},
"systemMetadata": {
"lastObserved": 1654621200000,
- "runId": "snowflake-2022_06_07-17_00_00"
+ "runId": "snowflake-2022_06_07-17_00_00",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -852,7 +878,8 @@
},
"systemMetadata": {
"lastObserved": 1654621200000,
- "runId": "snowflake-2022_06_07-17_00_00"
+ "runId": "snowflake-2022_06_07-17_00_00",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1048,7 +1075,8 @@
},
"systemMetadata": {
"lastObserved": 1654621200000,
- "runId": "snowflake-2022_06_07-17_00_00"
+ "runId": "snowflake-2022_06_07-17_00_00",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1074,7 +1102,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "snowflake-2023_08_04-09_52_28"
+ "runId": "snowflake-2023_08_04-09_52_28",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1089,7 +1118,8 @@
},
"systemMetadata": {
"lastObserved": 1654621200000,
- "runId": "snowflake-2022_06_07-17_00_00"
+ "runId": "snowflake-2022_06_07-17_00_00",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1106,7 +1136,8 @@
},
"systemMetadata": {
"lastObserved": 1654621200000,
- "runId": "snowflake-2022_06_07-17_00_00"
+ "runId": "snowflake-2022_06_07-17_00_00",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1130,7 +1161,8 @@
},
"systemMetadata": {
"lastObserved": 1654621200000,
- "runId": "snowflake-2022_06_07-17_00_00"
+ "runId": "snowflake-2022_06_07-17_00_00",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1145,7 +1177,8 @@
},
"systemMetadata": {
"lastObserved": 1654621200000,
- "runId": "snowflake-2022_06_07-17_00_00"
+ "runId": "snowflake-2022_06_07-17_00_00",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1341,7 +1374,8 @@
},
"systemMetadata": {
"lastObserved": 1654621200000,
- "runId": "snowflake-2022_06_07-17_00_00"
+ "runId": "snowflake-2022_06_07-17_00_00",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1367,7 +1401,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "snowflake-2023_08_04-09_52_28"
+ "runId": "snowflake-2023_08_04-09_52_28",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1382,7 +1417,8 @@
},
"systemMetadata": {
"lastObserved": 1654621200000,
- "runId": "snowflake-2022_06_07-17_00_00"
+ "runId": "snowflake-2022_06_07-17_00_00",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1399,7 +1435,8 @@
},
"systemMetadata": {
"lastObserved": 1654621200000,
- "runId": "snowflake-2022_06_07-17_00_00"
+ "runId": "snowflake-2022_06_07-17_00_00",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1423,7 +1460,8 @@
},
"systemMetadata": {
"lastObserved": 1654621200000,
- "runId": "snowflake-2022_06_07-17_00_00"
+ "runId": "snowflake-2022_06_07-17_00_00",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1438,7 +1476,8 @@
},
"systemMetadata": {
"lastObserved": 1654621200000,
- "runId": "snowflake-2022_06_07-17_00_00"
+ "runId": "snowflake-2022_06_07-17_00_00",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1634,7 +1673,8 @@
},
"systemMetadata": {
"lastObserved": 1654621200000,
- "runId": "snowflake-2022_06_07-17_00_00"
+ "runId": "snowflake-2022_06_07-17_00_00",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1660,7 +1700,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "snowflake-2023_08_04-09_52_28"
+ "runId": "snowflake-2023_08_04-09_52_28",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1675,7 +1716,8 @@
},
"systemMetadata": {
"lastObserved": 1654621200000,
- "runId": "snowflake-2022_06_07-17_00_00"
+ "runId": "snowflake-2022_06_07-17_00_00",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1692,7 +1734,8 @@
},
"systemMetadata": {
"lastObserved": 1654621200000,
- "runId": "snowflake-2022_06_07-17_00_00"
+ "runId": "snowflake-2022_06_07-17_00_00",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1716,7 +1759,8 @@
},
"systemMetadata": {
"lastObserved": 1654621200000,
- "runId": "snowflake-2022_06_07-17_00_00"
+ "runId": "snowflake-2022_06_07-17_00_00",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1731,7 +1775,8 @@
},
"systemMetadata": {
"lastObserved": 1654621200000,
- "runId": "snowflake-2022_06_07-17_00_00"
+ "runId": "snowflake-2022_06_07-17_00_00",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1927,7 +1972,8 @@
},
"systemMetadata": {
"lastObserved": 1654621200000,
- "runId": "snowflake-2022_06_07-17_00_00"
+ "runId": "snowflake-2022_06_07-17_00_00",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1953,7 +1999,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "snowflake-2023_08_04-09_52_28"
+ "runId": "snowflake-2023_08_04-09_52_28",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1968,7 +2015,8 @@
},
"systemMetadata": {
"lastObserved": 1654621200000,
- "runId": "snowflake-2022_06_07-17_00_00"
+ "runId": "snowflake-2022_06_07-17_00_00",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1985,7 +2033,8 @@
},
"systemMetadata": {
"lastObserved": 1654621200000,
- "runId": "snowflake-2022_06_07-17_00_00"
+ "runId": "snowflake-2022_06_07-17_00_00",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -2009,7 +2058,8 @@
},
"systemMetadata": {
"lastObserved": 1654621200000,
- "runId": "snowflake-2022_06_07-17_00_00"
+ "runId": "snowflake-2022_06_07-17_00_00",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -2024,7 +2074,8 @@
},
"systemMetadata": {
"lastObserved": 1654621200000,
- "runId": "snowflake-2022_06_07-17_00_00"
+ "runId": "snowflake-2022_06_07-17_00_00",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -2220,7 +2271,8 @@
},
"systemMetadata": {
"lastObserved": 1654621200000,
- "runId": "snowflake-2022_06_07-17_00_00"
+ "runId": "snowflake-2022_06_07-17_00_00",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -2246,7 +2298,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "snowflake-2023_08_04-09_52_28"
+ "runId": "snowflake-2023_08_04-09_52_28",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -2261,7 +2314,8 @@
},
"systemMetadata": {
"lastObserved": 1654621200000,
- "runId": "snowflake-2022_06_07-17_00_00"
+ "runId": "snowflake-2022_06_07-17_00_00",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -2278,7 +2332,8 @@
},
"systemMetadata": {
"lastObserved": 1654621200000,
- "runId": "snowflake-2022_06_07-17_00_00"
+ "runId": "snowflake-2022_06_07-17_00_00",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -2302,7 +2357,8 @@
},
"systemMetadata": {
"lastObserved": 1654621200000,
- "runId": "snowflake-2022_06_07-17_00_00"
+ "runId": "snowflake-2022_06_07-17_00_00",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -2317,7 +2373,8 @@
},
"systemMetadata": {
"lastObserved": 1654621200000,
- "runId": "snowflake-2022_06_07-17_00_00"
+ "runId": "snowflake-2022_06_07-17_00_00",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -2513,7 +2570,8 @@
},
"systemMetadata": {
"lastObserved": 1654621200000,
- "runId": "snowflake-2022_06_07-17_00_00"
+ "runId": "snowflake-2022_06_07-17_00_00",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -2539,7 +2597,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "snowflake-2023_08_04-09_52_28"
+ "runId": "snowflake-2023_08_04-09_52_28",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -2554,7 +2613,8 @@
},
"systemMetadata": {
"lastObserved": 1654621200000,
- "runId": "snowflake-2022_06_07-17_00_00"
+ "runId": "snowflake-2022_06_07-17_00_00",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -2571,7 +2631,8 @@
},
"systemMetadata": {
"lastObserved": 1654621200000,
- "runId": "snowflake-2022_06_07-17_00_00"
+ "runId": "snowflake-2022_06_07-17_00_00",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -2595,7 +2656,8 @@
},
"systemMetadata": {
"lastObserved": 1654621200000,
- "runId": "snowflake-2022_06_07-17_00_00"
+ "runId": "snowflake-2022_06_07-17_00_00",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -2610,7 +2672,8 @@
},
"systemMetadata": {
"lastObserved": 1654621200000,
- "runId": "snowflake-2022_06_07-17_00_00"
+ "runId": "snowflake-2022_06_07-17_00_00",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -2806,7 +2869,8 @@
},
"systemMetadata": {
"lastObserved": 1654621200000,
- "runId": "snowflake-2022_06_07-17_00_00"
+ "runId": "snowflake-2022_06_07-17_00_00",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -2832,7 +2896,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "snowflake-2023_08_04-09_52_28"
+ "runId": "snowflake-2023_08_04-09_52_28",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -2847,7 +2912,8 @@
},
"systemMetadata": {
"lastObserved": 1654621200000,
- "runId": "snowflake-2022_06_07-17_00_00"
+ "runId": "snowflake-2022_06_07-17_00_00",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -2864,7 +2930,8 @@
},
"systemMetadata": {
"lastObserved": 1654621200000,
- "runId": "snowflake-2022_06_07-17_00_00"
+ "runId": "snowflake-2022_06_07-17_00_00",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -2888,7 +2955,8 @@
},
"systemMetadata": {
"lastObserved": 1654621200000,
- "runId": "snowflake-2022_06_07-17_00_00"
+ "runId": "snowflake-2022_06_07-17_00_00",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -2903,7 +2971,8 @@
},
"systemMetadata": {
"lastObserved": 1654621200000,
- "runId": "snowflake-2022_06_07-17_00_00"
+ "runId": "snowflake-2022_06_07-17_00_00",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -3099,7 +3168,8 @@
},
"systemMetadata": {
"lastObserved": 1654621200000,
- "runId": "snowflake-2022_06_07-17_00_00"
+ "runId": "snowflake-2022_06_07-17_00_00",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -3125,7 +3195,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "snowflake-2023_08_04-09_52_28"
+ "runId": "snowflake-2023_08_04-09_52_28",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -3140,7 +3211,8 @@
},
"systemMetadata": {
"lastObserved": 1654621200000,
- "runId": "snowflake-2022_06_07-17_00_00"
+ "runId": "snowflake-2022_06_07-17_00_00",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -3157,7 +3229,8 @@
},
"systemMetadata": {
"lastObserved": 1654621200000,
- "runId": "snowflake-2022_06_07-17_00_00"
+ "runId": "snowflake-2022_06_07-17_00_00",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -3181,7 +3254,8 @@
},
"systemMetadata": {
"lastObserved": 1654621200000,
- "runId": "snowflake-2022_06_07-17_00_00"
+ "runId": "snowflake-2022_06_07-17_00_00",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -3197,7 +3271,8 @@
},
"systemMetadata": {
"lastObserved": 1654621200000,
- "runId": "snowflake-2022_06_07-17_00_00"
+ "runId": "snowflake-2022_06_07-17_00_00",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -3212,7 +3287,8 @@
},
"systemMetadata": {
"lastObserved": 1654621200000,
- "runId": "snowflake-2022_06_07-17_00_00"
+ "runId": "snowflake-2022_06_07-17_00_00",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -3382,7 +3458,8 @@
},
"systemMetadata": {
"lastObserved": 1654621200000,
- "runId": "snowflake-2022_06_07-17_00_00"
+ "runId": "snowflake-2022_06_07-17_00_00",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -3408,7 +3485,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "snowflake-2023_08_04-09_52_28"
+ "runId": "snowflake-2023_08_04-09_52_28",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -3423,7 +3501,8 @@
},
"systemMetadata": {
"lastObserved": 1654621200000,
- "runId": "snowflake-2022_06_07-17_00_00"
+ "runId": "snowflake-2022_06_07-17_00_00",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -3440,7 +3519,26 @@
},
"systemMetadata": {
"lastObserved": 1654621200000,
- "runId": "snowflake-2022_06_07-17_00_00"
+ "runId": "snowflake-2022_06_07-17_00_00",
+ "lastRunId": "no-run-id-provided"
+ }
+},
+{
+ "entityType": "dataset",
+ "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.view_1,PROD)",
+ "changeType": "UPSERT",
+ "aspectName": "viewProperties",
+ "aspect": {
+ "json": {
+ "materialized": false,
+ "viewLogic": "create view view_1 as select * from table_1",
+ "viewLanguage": "SQL"
+ }
+ },
+ "systemMetadata": {
+ "lastObserved": 1615443388097,
+ "runId": "snowflake-2023_10_06-17_59_03",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -3464,7 +3562,8 @@
},
"systemMetadata": {
"lastObserved": 1654621200000,
- "runId": "snowflake-2022_06_07-17_00_00"
+ "runId": "snowflake-2022_06_07-17_00_00",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -3480,7 +3579,8 @@
},
"systemMetadata": {
"lastObserved": 1654621200000,
- "runId": "snowflake-2022_06_07-17_00_00"
+ "runId": "snowflake-2022_06_07-17_00_00",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -3496,7 +3596,8 @@
},
"systemMetadata": {
"lastObserved": 1654621200000,
- "runId": "snowflake-2022_06_07-17_00_00"
+ "runId": "snowflake-2022_06_07-17_00_00",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -3512,7 +3613,8 @@
},
"systemMetadata": {
"lastObserved": 1654621200000,
- "runId": "snowflake-2022_06_07-17_00_00"
+ "runId": "snowflake-2022_06_07-17_00_00",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -3527,7 +3629,8 @@
},
"systemMetadata": {
"lastObserved": 1654621200000,
- "runId": "snowflake-2022_06_07-17_00_00"
+ "runId": "snowflake-2022_06_07-17_00_00",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -3690,7 +3793,8 @@
},
"systemMetadata": {
"lastObserved": 1654621200000,
- "runId": "snowflake-2022_06_07-17_00_00"
+ "runId": "snowflake-2022_06_07-17_00_00",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -3716,7 +3820,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "snowflake-2023_08_04-09_52_28"
+ "runId": "snowflake-2023_08_04-09_52_28",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -3731,7 +3836,8 @@
},
"systemMetadata": {
"lastObserved": 1654621200000,
- "runId": "snowflake-2022_06_07-17_00_00"
+ "runId": "snowflake-2022_06_07-17_00_00",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -3748,7 +3854,8 @@
},
"systemMetadata": {
"lastObserved": 1654621200000,
- "runId": "snowflake-2022_06_07-17_00_00"
+ "runId": "snowflake-2022_06_07-17_00_00",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -3773,7 +3880,8 @@
},
"systemMetadata": {
"lastObserved": 1654621200000,
- "runId": "snowflake-2022_06_07-17_00_00"
+ "runId": "snowflake-2022_06_07-17_00_00",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -3797,7 +3905,26 @@
},
"systemMetadata": {
"lastObserved": 1654621200000,
- "runId": "snowflake-2022_06_07-17_00_00"
+ "runId": "snowflake-2022_06_07-17_00_00",
+ "lastRunId": "no-run-id-provided"
+ }
+},
+{
+ "entityType": "dataset",
+ "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.view_2,PROD)",
+ "changeType": "UPSERT",
+ "aspectName": "viewProperties",
+ "aspect": {
+ "json": {
+ "materialized": false,
+ "viewLogic": "create view view_2 as select * from table_2",
+ "viewLanguage": "SQL"
+ }
+ },
+ "systemMetadata": {
+ "lastObserved": 1615443388097,
+ "runId": "snowflake-2023_10_06-17_59_03",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -3819,7 +3946,8 @@
},
"systemMetadata": {
"lastObserved": 1654621200000,
- "runId": "snowflake-2022_06_07-17_00_00"
+ "runId": "snowflake-2022_06_07-17_00_00",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -3841,7 +3969,8 @@
},
"systemMetadata": {
"lastObserved": 1654621200000,
- "runId": "snowflake-2022_06_07-17_00_00"
+ "runId": "snowflake-2022_06_07-17_00_00",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -3863,7 +3992,8 @@
},
"systemMetadata": {
"lastObserved": 1654621200000,
- "runId": "snowflake-2022_06_07-17_00_00"
+ "runId": "snowflake-2022_06_07-17_00_00",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -3885,7 +4015,8 @@
},
"systemMetadata": {
"lastObserved": 1654621200000,
- "runId": "snowflake-2022_06_07-17_00_00"
+ "runId": "snowflake-2022_06_07-17_00_00",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -3907,7 +4038,8 @@
},
"systemMetadata": {
"lastObserved": 1654621200000,
- "runId": "snowflake-2022_06_07-17_00_00"
+ "runId": "snowflake-2022_06_07-17_00_00",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -3929,7 +4061,8 @@
},
"systemMetadata": {
"lastObserved": 1654621200000,
- "runId": "snowflake-2022_06_07-17_00_00"
+ "runId": "snowflake-2022_06_07-17_00_00",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -3951,7 +4084,8 @@
},
"systemMetadata": {
"lastObserved": 1654621200000,
- "runId": "snowflake-2022_06_07-17_00_00"
+ "runId": "snowflake-2022_06_07-17_00_00",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -3973,7 +4107,8 @@
},
"systemMetadata": {
"lastObserved": 1654621200000,
- "runId": "snowflake-2022_06_07-17_00_00"
+ "runId": "snowflake-2022_06_07-17_00_00",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -3995,7 +4130,8 @@
},
"systemMetadata": {
"lastObserved": 1654621200000,
- "runId": "snowflake-2022_06_07-17_00_00"
+ "runId": "snowflake-2022_06_07-17_00_00",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -4017,7 +4153,145 @@
},
"systemMetadata": {
"lastObserved": 1654621200000,
- "runId": "snowflake-2022_06_07-17_00_00"
+ "runId": "snowflake-2022_06_07-17_00_00",
+ "lastRunId": "no-run-id-provided"
+ }
+},
+{
+ "entityType": "dataset",
+ "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.view_1,PROD)",
+ "changeType": "UPSERT",
+ "aspectName": "upstreamLineage",
+ "aspect": {
+ "json": {
+ "upstreams": [
+ {
+ "auditStamp": {
+ "time": 0,
+ "actor": "urn:li:corpuser:unknown"
+ },
+ "dataset": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_1,PROD)",
+ "type": "VIEW"
+ }
+ ],
+ "fineGrainedLineages": [
+ {
+ "upstreamType": "FIELD_SET",
+ "upstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_1,PROD),col_1)"
+ ],
+ "downstreamType": "FIELD",
+ "downstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.view_1,PROD),col_1)"
+ ],
+ "confidenceScore": 1.0
+ },
+ {
+ "upstreamType": "FIELD_SET",
+ "upstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_1,PROD),col_10)"
+ ],
+ "downstreamType": "FIELD",
+ "downstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.view_1,PROD),col_10)"
+ ],
+ "confidenceScore": 1.0
+ },
+ {
+ "upstreamType": "FIELD_SET",
+ "upstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_1,PROD),col_2)"
+ ],
+ "downstreamType": "FIELD",
+ "downstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.view_1,PROD),col_2)"
+ ],
+ "confidenceScore": 1.0
+ },
+ {
+ "upstreamType": "FIELD_SET",
+ "upstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_1,PROD),col_3)"
+ ],
+ "downstreamType": "FIELD",
+ "downstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.view_1,PROD),col_3)"
+ ],
+ "confidenceScore": 1.0
+ },
+ {
+ "upstreamType": "FIELD_SET",
+ "upstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_1,PROD),col_4)"
+ ],
+ "downstreamType": "FIELD",
+ "downstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.view_1,PROD),col_4)"
+ ],
+ "confidenceScore": 1.0
+ },
+ {
+ "upstreamType": "FIELD_SET",
+ "upstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_1,PROD),col_5)"
+ ],
+ "downstreamType": "FIELD",
+ "downstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.view_1,PROD),col_5)"
+ ],
+ "confidenceScore": 1.0
+ },
+ {
+ "upstreamType": "FIELD_SET",
+ "upstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_1,PROD),col_6)"
+ ],
+ "downstreamType": "FIELD",
+ "downstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.view_1,PROD),col_6)"
+ ],
+ "confidenceScore": 1.0
+ },
+ {
+ "upstreamType": "FIELD_SET",
+ "upstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_1,PROD),col_7)"
+ ],
+ "downstreamType": "FIELD",
+ "downstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.view_1,PROD),col_7)"
+ ],
+ "confidenceScore": 1.0
+ },
+ {
+ "upstreamType": "FIELD_SET",
+ "upstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_1,PROD),col_8)"
+ ],
+ "downstreamType": "FIELD",
+ "downstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.view_1,PROD),col_8)"
+ ],
+ "confidenceScore": 1.0
+ },
+ {
+ "upstreamType": "FIELD_SET",
+ "upstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_1,PROD),col_9)"
+ ],
+ "downstreamType": "FIELD",
+ "downstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.view_1,PROD),col_9)"
+ ],
+ "confidenceScore": 1.0
+ }
+ ]
+ }
+ },
+ "systemMetadata": {
+ "lastObserved": 1615443388097,
+ "runId": "snowflake-2023_10_06-17_59_03",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -4034,14 +4308,127 @@
"actor": "urn:li:corpuser:unknown"
},
"dataset": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD)",
- "type": "TRANSFORMED"
+ "type": "VIEW"
+ }
+ ],
+ "fineGrainedLineages": [
+ {
+ "upstreamType": "FIELD_SET",
+ "upstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD),col_1)"
+ ],
+ "downstreamType": "FIELD",
+ "downstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.view_2,PROD),col_1)"
+ ],
+ "confidenceScore": 1.0
+ },
+ {
+ "upstreamType": "FIELD_SET",
+ "upstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD),col_10)"
+ ],
+ "downstreamType": "FIELD",
+ "downstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.view_2,PROD),col_10)"
+ ],
+ "confidenceScore": 1.0
+ },
+ {
+ "upstreamType": "FIELD_SET",
+ "upstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD),col_2)"
+ ],
+ "downstreamType": "FIELD",
+ "downstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.view_2,PROD),col_2)"
+ ],
+ "confidenceScore": 1.0
+ },
+ {
+ "upstreamType": "FIELD_SET",
+ "upstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD),col_3)"
+ ],
+ "downstreamType": "FIELD",
+ "downstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.view_2,PROD),col_3)"
+ ],
+ "confidenceScore": 1.0
+ },
+ {
+ "upstreamType": "FIELD_SET",
+ "upstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD),col_4)"
+ ],
+ "downstreamType": "FIELD",
+ "downstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.view_2,PROD),col_4)"
+ ],
+ "confidenceScore": 1.0
+ },
+ {
+ "upstreamType": "FIELD_SET",
+ "upstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD),col_5)"
+ ],
+ "downstreamType": "FIELD",
+ "downstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.view_2,PROD),col_5)"
+ ],
+ "confidenceScore": 1.0
+ },
+ {
+ "upstreamType": "FIELD_SET",
+ "upstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD),col_6)"
+ ],
+ "downstreamType": "FIELD",
+ "downstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.view_2,PROD),col_6)"
+ ],
+ "confidenceScore": 1.0
+ },
+ {
+ "upstreamType": "FIELD_SET",
+ "upstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD),col_7)"
+ ],
+ "downstreamType": "FIELD",
+ "downstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.view_2,PROD),col_7)"
+ ],
+ "confidenceScore": 1.0
+ },
+ {
+ "upstreamType": "FIELD_SET",
+ "upstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD),col_8)"
+ ],
+ "downstreamType": "FIELD",
+ "downstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.view_2,PROD),col_8)"
+ ],
+ "confidenceScore": 1.0
+ },
+ {
+ "upstreamType": "FIELD_SET",
+ "upstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD),col_9)"
+ ],
+ "downstreamType": "FIELD",
+ "downstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.view_2,PROD),col_9)"
+ ],
+ "confidenceScore": 1.0
}
]
}
},
"systemMetadata": {
- "lastObserved": 1654621200000,
- "runId": "snowflake-2022_06_07-17_00_00"
+ "lastObserved": 1615443388097,
+ "runId": "snowflake-2023_10_06-17_59_03",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -4204,7 +4591,8 @@
},
"systemMetadata": {
"lastObserved": 1654621200000,
- "runId": "snowflake-2022_06_07-17_00_00"
+ "runId": "snowflake-2022_06_07-17_00_00",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -4340,7 +4728,8 @@
},
"systemMetadata": {
"lastObserved": 1654621200000,
- "runId": "snowflake-2022_06_07-17_00_00"
+ "runId": "snowflake-2022_06_07-17_00_00",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -4476,7 +4865,8 @@
},
"systemMetadata": {
"lastObserved": 1654621200000,
- "runId": "snowflake-2022_06_07-17_00_00"
+ "runId": "snowflake-2022_06_07-17_00_00",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -4612,7 +5002,8 @@
},
"systemMetadata": {
"lastObserved": 1654621200000,
- "runId": "snowflake-2022_06_07-17_00_00"
+ "runId": "snowflake-2022_06_07-17_00_00",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -4748,7 +5139,8 @@
},
"systemMetadata": {
"lastObserved": 1654621200000,
- "runId": "snowflake-2022_06_07-17_00_00"
+ "runId": "snowflake-2022_06_07-17_00_00",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -4884,7 +5276,8 @@
},
"systemMetadata": {
"lastObserved": 1654621200000,
- "runId": "snowflake-2022_06_07-17_00_00"
+ "runId": "snowflake-2022_06_07-17_00_00",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -5020,7 +5413,8 @@
},
"systemMetadata": {
"lastObserved": 1654621200000,
- "runId": "snowflake-2022_06_07-17_00_00"
+ "runId": "snowflake-2022_06_07-17_00_00",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -5156,7 +5550,8 @@
},
"systemMetadata": {
"lastObserved": 1654621200000,
- "runId": "snowflake-2022_06_07-17_00_00"
+ "runId": "snowflake-2022_06_07-17_00_00",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -5292,7 +5687,8 @@
},
"systemMetadata": {
"lastObserved": 1654621200000,
- "runId": "snowflake-2022_06_07-17_00_00"
+ "runId": "snowflake-2022_06_07-17_00_00",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -5428,7 +5824,8 @@
},
"systemMetadata": {
"lastObserved": 1654621200000,
- "runId": "snowflake-2022_06_07-17_00_00"
+ "runId": "snowflake-2022_06_07-17_00_00",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -5456,7 +5853,8 @@
},
"systemMetadata": {
"lastObserved": 1654621200000,
- "runId": "snowflake-2022_06_07-17_00_00"
+ "runId": "snowflake-2022_06_07-17_00_00",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -5484,7 +5882,8 @@
},
"systemMetadata": {
"lastObserved": 1654621200000,
- "runId": "snowflake-2022_06_07-17_00_00"
+ "runId": "snowflake-2022_06_07-17_00_00",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -5512,7 +5911,8 @@
},
"systemMetadata": {
"lastObserved": 1654621200000,
- "runId": "snowflake-2022_06_07-17_00_00"
+ "runId": "snowflake-2022_06_07-17_00_00",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -5540,7 +5940,8 @@
},
"systemMetadata": {
"lastObserved": 1654621200000,
- "runId": "snowflake-2022_06_07-17_00_00"
+ "runId": "snowflake-2022_06_07-17_00_00",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -5568,7 +5969,8 @@
},
"systemMetadata": {
"lastObserved": 1654621200000,
- "runId": "snowflake-2022_06_07-17_00_00"
+ "runId": "snowflake-2022_06_07-17_00_00",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -5596,7 +5998,8 @@
},
"systemMetadata": {
"lastObserved": 1654621200000,
- "runId": "snowflake-2022_06_07-17_00_00"
+ "runId": "snowflake-2022_06_07-17_00_00",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -5624,7 +6027,8 @@
},
"systemMetadata": {
"lastObserved": 1654621200000,
- "runId": "snowflake-2022_06_07-17_00_00"
+ "runId": "snowflake-2022_06_07-17_00_00",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -5652,7 +6056,8 @@
},
"systemMetadata": {
"lastObserved": 1654621200000,
- "runId": "snowflake-2022_06_07-17_00_00"
+ "runId": "snowflake-2022_06_07-17_00_00",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -5680,7 +6085,8 @@
},
"systemMetadata": {
"lastObserved": 1654621200000,
- "runId": "snowflake-2022_06_07-17_00_00"
+ "runId": "snowflake-2022_06_07-17_00_00",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -5708,7 +6114,8 @@
},
"systemMetadata": {
"lastObserved": 1654621200000,
- "runId": "snowflake-2022_06_07-17_00_00"
+ "runId": "snowflake-2022_06_07-17_00_00",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -5736,7 +6143,8 @@
},
"systemMetadata": {
"lastObserved": 1654621200000,
- "runId": "snowflake-2022_06_07-17_00_00"
+ "runId": "snowflake-2022_06_07-17_00_00",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -5764,7 +6172,8 @@
},
"systemMetadata": {
"lastObserved": 1654621200000,
- "runId": "snowflake-2022_06_07-17_00_00"
+ "runId": "snowflake-2022_06_07-17_00_00",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -5786,7 +6195,8 @@
},
"systemMetadata": {
"lastObserved": 1654621200000,
- "runId": "snowflake-2022_06_07-17_00_00"
+ "runId": "snowflake-2022_06_07-17_00_00",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -5808,7 +6218,8 @@
},
"systemMetadata": {
"lastObserved": 1654621200000,
- "runId": "snowflake-2022_06_07-17_00_00"
+ "runId": "snowflake-2022_06_07-17_00_00",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -5830,7 +6241,8 @@
},
"systemMetadata": {
"lastObserved": 1654621200000,
- "runId": "snowflake-2022_06_07-17_00_00"
+ "runId": "snowflake-2022_06_07-17_00_00",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -5852,7 +6264,8 @@
},
"systemMetadata": {
"lastObserved": 1654621200000,
- "runId": "snowflake-2022_06_07-17_00_00"
+ "runId": "snowflake-2022_06_07-17_00_00",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -5874,7 +6287,8 @@
},
"systemMetadata": {
"lastObserved": 1654621200000,
- "runId": "snowflake-2022_06_07-17_00_00"
+ "runId": "snowflake-2022_06_07-17_00_00",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -5896,7 +6310,8 @@
},
"systemMetadata": {
"lastObserved": 1654621200000,
- "runId": "snowflake-2022_06_07-17_00_00"
+ "runId": "snowflake-2022_06_07-17_00_00",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -5918,7 +6333,8 @@
},
"systemMetadata": {
"lastObserved": 1654621200000,
- "runId": "snowflake-2022_06_07-17_00_00"
+ "runId": "snowflake-2022_06_07-17_00_00",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -5940,7 +6356,8 @@
},
"systemMetadata": {
"lastObserved": 1654621200000,
- "runId": "snowflake-2022_06_07-17_00_00"
+ "runId": "snowflake-2022_06_07-17_00_00",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -5962,7 +6379,8 @@
},
"systemMetadata": {
"lastObserved": 1654621200000,
- "runId": "snowflake-2022_06_07-17_00_00"
+ "runId": "snowflake-2022_06_07-17_00_00",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -5984,7 +6402,8 @@
},
"systemMetadata": {
"lastObserved": 1654621200000,
- "runId": "snowflake-2022_06_07-17_00_00"
+ "runId": "snowflake-2022_06_07-17_00_00",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -5999,7 +6418,8 @@
},
"systemMetadata": {
"lastObserved": 1654621200000,
- "runId": "snowflake-2022_06_07-17_00_00"
+ "runId": "snowflake-2022_06_07-17_00_00",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -6014,7 +6434,8 @@
},
"systemMetadata": {
"lastObserved": 1654621200000,
- "runId": "snowflake-2022_06_07-17_00_00"
+ "runId": "snowflake-2022_06_07-17_00_00",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -6029,7 +6450,8 @@
},
"systemMetadata": {
"lastObserved": 1654621200000,
- "runId": "snowflake-2022_06_07-17_00_00"
+ "runId": "snowflake-2022_06_07-17_00_00",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -6044,7 +6466,8 @@
},
"systemMetadata": {
"lastObserved": 1654621200000,
- "runId": "snowflake-2022_06_07-17_00_00"
+ "runId": "snowflake-2022_06_07-17_00_00",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -6059,7 +6482,8 @@
},
"systemMetadata": {
"lastObserved": 1654621200000,
- "runId": "snowflake-2022_06_07-17_00_00"
+ "runId": "snowflake-2022_06_07-17_00_00",
+ "lastRunId": "no-run-id-provided"
}
}
]
\ No newline at end of file
diff --git a/metadata-ingestion/tests/integration/snowflake/snowflake_privatelink_golden.json b/metadata-ingestion/tests/integration/snowflake/snowflake_privatelink_golden.json
index 5057dacd5b0c8..5e55860483d24 100644
--- a/metadata-ingestion/tests/integration/snowflake/snowflake_privatelink_golden.json
+++ b/metadata-ingestion/tests/integration/snowflake/snowflake_privatelink_golden.json
@@ -1,13 +1,14 @@
[
{
"entityType": "container",
- "entityUrn": "urn:li:container:5e359958be02ce647cd9ac196dbd4585",
+ "entityUrn": "urn:li:container:900b1327253068cb1537b1b3c807ddab",
"changeType": "UPSERT",
"aspectName": "containerProperties",
"aspect": {
"json": {
"customProperties": {
"platform": "snowflake",
+ "instance": "instance1",
"env": "PROD",
"database": "test_db"
},
@@ -29,13 +30,14 @@
}
},
{
- "entityType": "container",
- "entityUrn": "urn:li:container:5e359958be02ce647cd9ac196dbd4585",
+ "entityType": "dataset",
+ "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_3,PROD)",
"changeType": "UPSERT",
- "aspectName": "status",
+ "aspectName": "dataPlatformInstance",
"aspect": {
"json": {
- "removed": false
+ "platform": "urn:li:dataPlatform:snowflake",
+ "instance": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:snowflake,instance1)"
}
},
"systemMetadata": {
@@ -46,12 +48,12 @@
},
{
"entityType": "container",
- "entityUrn": "urn:li:container:5e359958be02ce647cd9ac196dbd4585",
+ "entityUrn": "urn:li:container:900b1327253068cb1537b1b3c807ddab",
"changeType": "UPSERT",
- "aspectName": "dataPlatformInstance",
+ "aspectName": "status",
"aspect": {
"json": {
- "platform": "urn:li:dataPlatform:snowflake"
+ "removed": false
}
},
"systemMetadata": {
@@ -61,15 +63,13 @@
}
},
{
- "entityType": "container",
- "entityUrn": "urn:li:container:5e359958be02ce647cd9ac196dbd4585",
+ "entityType": "dataset",
+ "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_3,PROD)",
"changeType": "UPSERT",
- "aspectName": "subTypes",
+ "aspectName": "status",
"aspect": {
"json": {
- "typeNames": [
- "Database"
- ]
+ "removed": false
}
},
"systemMetadata": {
@@ -80,12 +80,14 @@
},
{
"entityType": "container",
- "entityUrn": "urn:li:container:5e359958be02ce647cd9ac196dbd4585",
+ "entityUrn": "urn:li:container:900b1327253068cb1537b1b3c807ddab",
"changeType": "UPSERT",
- "aspectName": "browsePathsV2",
+ "aspectName": "subTypes",
"aspect": {
"json": {
- "path": []
+ "typeNames": [
+ "Database"
+ ]
}
},
"systemMetadata": {
@@ -96,26 +98,17 @@
},
{
"entityType": "container",
- "entityUrn": "urn:li:container:94c696a054bab40b73e640a7f82e3b1c",
+ "entityUrn": "urn:li:container:900b1327253068cb1537b1b3c807ddab",
"changeType": "UPSERT",
- "aspectName": "containerProperties",
+ "aspectName": "browsePathsV2",
"aspect": {
"json": {
- "customProperties": {
- "platform": "snowflake",
- "env": "PROD",
- "database": "test_db",
- "schema": "test_schema"
- },
- "externalUrl": "https://app.abc12345.ap-south-1.privatelink.snowflakecomputing.com/#/data/databases/TEST_DB/schemas/TEST_SCHEMA/",
- "name": "TEST_SCHEMA",
- "description": "comment for TEST_DB.TEST_SCHEMA",
- "created": {
- "time": 1623110400000
- },
- "lastModified": {
- "time": 1623110400000
- }
+ "path": [
+ {
+ "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:snowflake,instance1)",
+ "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:snowflake,instance1)"
+ }
+ ]
}
},
"systemMetadata": {
@@ -126,12 +119,13 @@
},
{
"entityType": "container",
- "entityUrn": "urn:li:container:94c696a054bab40b73e640a7f82e3b1c",
+ "entityUrn": "urn:li:container:900b1327253068cb1537b1b3c807ddab",
"changeType": "UPSERT",
- "aspectName": "status",
+ "aspectName": "dataPlatformInstance",
"aspect": {
"json": {
- "removed": false
+ "platform": "urn:li:dataPlatform:snowflake",
+ "instance": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:snowflake,instance1)"
}
},
"systemMetadata": {
@@ -141,13 +135,24 @@
}
},
{
- "entityType": "container",
- "entityUrn": "urn:li:container:94c696a054bab40b73e640a7f82e3b1c",
+ "entityType": "dataset",
+ "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_3,PROD)",
"changeType": "UPSERT",
- "aspectName": "dataPlatformInstance",
+ "aspectName": "datasetProperties",
"aspect": {
"json": {
- "platform": "urn:li:dataPlatform:snowflake"
+ "customProperties": {},
+ "externalUrl": "https://app.abc12345.ap-south-1.privatelink.snowflakecomputing.com/#/data/databases/TEST_DB/schemas/TEST_SCHEMA/table/TABLE_3/",
+ "name": "TABLE_3",
+ "qualifiedName": "TEST_DB.TEST_SCHEMA.TABLE_3",
+ "description": "Comment for Table",
+ "created": {
+ "time": 1623110400000
+ },
+ "lastModified": {
+ "time": 1623110400000
+ },
+ "tags": []
}
},
"systemMetadata": {
@@ -157,14 +162,14 @@
}
},
{
- "entityType": "container",
- "entityUrn": "urn:li:container:94c696a054bab40b73e640a7f82e3b1c",
+ "entityType": "dataset",
+ "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_3,PROD)",
"changeType": "UPSERT",
"aspectName": "subTypes",
"aspect": {
"json": {
"typeNames": [
- "Schema"
+ "Table"
]
}
},
@@ -175,32 +180,24 @@
}
},
{
- "entityType": "container",
- "entityUrn": "urn:li:container:94c696a054bab40b73e640a7f82e3b1c",
- "changeType": "UPSERT",
- "aspectName": "container",
- "aspect": {
- "json": {
- "container": "urn:li:container:5e359958be02ce647cd9ac196dbd4585"
- }
- },
- "systemMetadata": {
- "lastObserved": 1654621200000,
- "runId": "snowflake-2022_06_07-17_00_00",
- "lastRunId": "no-run-id-provided"
- }
-},
-{
- "entityType": "container",
- "entityUrn": "urn:li:container:94c696a054bab40b73e640a7f82e3b1c",
+ "entityType": "dataset",
+ "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_3,PROD)",
"changeType": "UPSERT",
"aspectName": "browsePathsV2",
"aspect": {
"json": {
"path": [
{
- "id": "urn:li:container:5e359958be02ce647cd9ac196dbd4585",
- "urn": "urn:li:container:5e359958be02ce647cd9ac196dbd4585"
+ "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:snowflake,instance1)",
+ "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:snowflake,instance1)"
+ },
+ {
+ "id": "urn:li:container:900b1327253068cb1537b1b3c807ddab",
+ "urn": "urn:li:container:900b1327253068cb1537b1b3c807ddab"
+ },
+ {
+ "id": "urn:li:container:eac598ee71ef1b5e24448d650c08aa5f",
+ "urn": "urn:li:container:eac598ee71ef1b5e24448d650c08aa5f"
}
]
}
@@ -212,13 +209,28 @@
}
},
{
- "entityType": "dataset",
- "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_1,PROD)",
+ "entityType": "container",
+ "entityUrn": "urn:li:container:eac598ee71ef1b5e24448d650c08aa5f",
"changeType": "UPSERT",
- "aspectName": "status",
+ "aspectName": "containerProperties",
"aspect": {
"json": {
- "removed": false
+ "customProperties": {
+ "platform": "snowflake",
+ "instance": "instance1",
+ "env": "PROD",
+ "database": "test_db",
+ "schema": "test_schema"
+ },
+ "externalUrl": "https://app.abc12345.ap-south-1.privatelink.snowflakecomputing.com/#/data/databases/TEST_DB/schemas/TEST_SCHEMA/",
+ "name": "TEST_SCHEMA",
+ "description": "comment for TEST_DB.TEST_SCHEMA",
+ "created": {
+ "time": 1623110400000
+ },
+ "lastModified": {
+ "time": 1623110400000
+ }
}
},
"systemMetadata": {
@@ -229,12 +241,12 @@
},
{
"entityType": "dataset",
- "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_1,PROD)",
+ "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_3,PROD)",
"changeType": "UPSERT",
"aspectName": "schemaMetadata",
"aspect": {
"json": {
- "schemaName": "test_db.test_schema.table_1",
+ "schemaName": "test_db.test_schema.table_3",
"platform": "urn:li:dataPlatform:snowflake",
"version": 0,
"created": {
@@ -392,24 +404,13 @@
}
},
{
- "entityType": "dataset",
- "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_1,PROD)",
+ "entityType": "container",
+ "entityUrn": "urn:li:container:eac598ee71ef1b5e24448d650c08aa5f",
"changeType": "UPSERT",
- "aspectName": "datasetProperties",
+ "aspectName": "status",
"aspect": {
"json": {
- "customProperties": {},
- "externalUrl": "https://app.abc12345.ap-south-1.privatelink.snowflakecomputing.com/#/data/databases/TEST_DB/schemas/TEST_SCHEMA/table/TABLE_1/",
- "name": "TABLE_1",
- "qualifiedName": "TEST_DB.TEST_SCHEMA.TABLE_1",
- "description": "Comment for Table",
- "created": {
- "time": 1623110400000
- },
- "lastModified": {
- "time": 1623110400000
- },
- "tags": []
+ "removed": false
}
},
"systemMetadata": {
@@ -420,12 +421,12 @@
},
{
"entityType": "dataset",
- "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_1,PROD)",
+ "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_3,PROD)",
"changeType": "UPSERT",
"aspectName": "container",
"aspect": {
"json": {
- "container": "urn:li:container:94c696a054bab40b73e640a7f82e3b1c"
+ "container": "urn:li:container:eac598ee71ef1b5e24448d650c08aa5f"
}
},
"systemMetadata": {
@@ -435,14 +436,14 @@
}
},
{
- "entityType": "dataset",
- "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_1,PROD)",
+ "entityType": "container",
+ "entityUrn": "urn:li:container:eac598ee71ef1b5e24448d650c08aa5f",
"changeType": "UPSERT",
"aspectName": "subTypes",
"aspect": {
"json": {
"typeNames": [
- "Table"
+ "Schema"
]
}
},
@@ -453,20 +454,53 @@
}
},
{
- "entityType": "dataset",
- "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_1,PROD)",
+ "entityType": "container",
+ "entityUrn": "urn:li:container:eac598ee71ef1b5e24448d650c08aa5f",
+ "changeType": "UPSERT",
+ "aspectName": "container",
+ "aspect": {
+ "json": {
+ "container": "urn:li:container:900b1327253068cb1537b1b3c807ddab"
+ }
+ },
+ "systemMetadata": {
+ "lastObserved": 1654621200000,
+ "runId": "snowflake-2022_06_07-17_00_00",
+ "lastRunId": "no-run-id-provided"
+ }
+},
+{
+ "entityType": "container",
+ "entityUrn": "urn:li:container:eac598ee71ef1b5e24448d650c08aa5f",
+ "changeType": "UPSERT",
+ "aspectName": "dataPlatformInstance",
+ "aspect": {
+ "json": {
+ "platform": "urn:li:dataPlatform:snowflake",
+ "instance": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:snowflake,instance1)"
+ }
+ },
+ "systemMetadata": {
+ "lastObserved": 1654621200000,
+ "runId": "snowflake-2022_06_07-17_00_00",
+ "lastRunId": "no-run-id-provided"
+ }
+},
+{
+ "entityType": "container",
+ "entityUrn": "urn:li:container:eac598ee71ef1b5e24448d650c08aa5f",
"changeType": "UPSERT",
"aspectName": "browsePathsV2",
"aspect": {
"json": {
"path": [
{
- "id": "urn:li:container:5e359958be02ce647cd9ac196dbd4585",
- "urn": "urn:li:container:5e359958be02ce647cd9ac196dbd4585"
+ "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:snowflake,instance1)",
+ "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:snowflake,instance1)"
},
{
- "id": "urn:li:container:94c696a054bab40b73e640a7f82e3b1c",
- "urn": "urn:li:container:94c696a054bab40b73e640a7f82e3b1c"
+ "id": "urn:li:container:900b1327253068cb1537b1b3c807ddab",
+ "urn": "urn:li:container:900b1327253068cb1537b1b3c807ddab"
}
]
}
@@ -479,7 +513,85 @@
},
{
"entityType": "dataset",
- "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD)",
+ "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.view_1,PROD)",
+ "changeType": "UPSERT",
+ "aspectName": "viewProperties",
+ "aspect": {
+ "json": {
+ "materialized": false,
+ "viewLogic": "create view view_1 as select * from table_1",
+ "viewLanguage": "SQL"
+ }
+ },
+ "systemMetadata": {
+ "lastObserved": 1654621200000,
+ "runId": "snowflake-2022_06_07-17_00_00",
+ "lastRunId": "no-run-id-provided"
+ }
+},
+{
+ "entityType": "dataset",
+ "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.view_1,PROD)",
+ "changeType": "UPSERT",
+ "aspectName": "dataPlatformInstance",
+ "aspect": {
+ "json": {
+ "platform": "urn:li:dataPlatform:snowflake",
+ "instance": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:snowflake,instance1)"
+ }
+ },
+ "systemMetadata": {
+ "lastObserved": 1654621200000,
+ "runId": "snowflake-2022_06_07-17_00_00",
+ "lastRunId": "no-run-id-provided"
+ }
+},
+{
+ "entityType": "dataset",
+ "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.view_1,PROD)",
+ "changeType": "UPSERT",
+ "aspectName": "status",
+ "aspect": {
+ "json": {
+ "removed": false
+ }
+ },
+ "systemMetadata": {
+ "lastObserved": 1654621200000,
+ "runId": "snowflake-2022_06_07-17_00_00",
+ "lastRunId": "no-run-id-provided"
+ }
+},
+{
+ "entityType": "dataset",
+ "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.view_1,PROD)",
+ "changeType": "UPSERT",
+ "aspectName": "datasetProperties",
+ "aspect": {
+ "json": {
+ "customProperties": {},
+ "externalUrl": "https://app.abc12345.ap-south-1.privatelink.snowflakecomputing.com/#/data/databases/TEST_DB/schemas/TEST_SCHEMA/view/VIEW_1/",
+ "name": "VIEW_1",
+ "qualifiedName": "TEST_DB.TEST_SCHEMA.VIEW_1",
+ "description": "Comment for View",
+ "created": {
+ "time": 1623110400000
+ },
+ "lastModified": {
+ "time": 1623110400000
+ },
+ "tags": []
+ }
+ },
+ "systemMetadata": {
+ "lastObserved": 1654621200000,
+ "runId": "snowflake-2022_06_07-17_00_00",
+ "lastRunId": "no-run-id-provided"
+ }
+},
+{
+ "entityType": "dataset",
+ "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_1,PROD)",
"changeType": "UPSERT",
"aspectName": "status",
"aspect": {
@@ -495,12 +607,12 @@
},
{
"entityType": "dataset",
- "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD)",
+ "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_1,PROD)",
"changeType": "UPSERT",
"aspectName": "schemaMetadata",
"aspect": {
"json": {
- "schemaName": "test_db.test_schema.table_2",
+ "schemaName": "test_db.test_schema.table_1",
"platform": "urn:li:dataPlatform:snowflake",
"version": 0,
"created": {
@@ -659,23 +771,14 @@
},
{
"entityType": "dataset",
- "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD)",
+ "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.view_1,PROD)",
"changeType": "UPSERT",
- "aspectName": "datasetProperties",
+ "aspectName": "subTypes",
"aspect": {
"json": {
- "customProperties": {},
- "externalUrl": "https://app.abc12345.ap-south-1.privatelink.snowflakecomputing.com/#/data/databases/TEST_DB/schemas/TEST_SCHEMA/table/TABLE_2/",
- "name": "TABLE_2",
- "qualifiedName": "TEST_DB.TEST_SCHEMA.TABLE_2",
- "description": "Comment for Table",
- "created": {
- "time": 1623110400000
- },
- "lastModified": {
- "time": 1623110400000
- },
- "tags": []
+ "typeNames": [
+ "View"
+ ]
}
},
"systemMetadata": {
@@ -686,12 +789,12 @@
},
{
"entityType": "dataset",
- "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD)",
+ "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_1,PROD)",
"changeType": "UPSERT",
"aspectName": "container",
"aspect": {
"json": {
- "container": "urn:li:container:94c696a054bab40b73e640a7f82e3b1c"
+ "container": "urn:li:container:eac598ee71ef1b5e24448d650c08aa5f"
}
},
"systemMetadata": {
@@ -702,39 +805,23 @@
},
{
"entityType": "dataset",
- "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD)",
+ "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_1,PROD)",
"changeType": "UPSERT",
- "aspectName": "subTypes",
+ "aspectName": "datasetProperties",
"aspect": {
"json": {
- "typeNames": [
- "Table"
- ]
- }
- },
- "systemMetadata": {
- "lastObserved": 1654621200000,
- "runId": "snowflake-2022_06_07-17_00_00",
- "lastRunId": "no-run-id-provided"
- }
-},
-{
- "entityType": "dataset",
- "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD)",
- "changeType": "UPSERT",
- "aspectName": "browsePathsV2",
- "aspect": {
- "json": {
- "path": [
- {
- "id": "urn:li:container:5e359958be02ce647cd9ac196dbd4585",
- "urn": "urn:li:container:5e359958be02ce647cd9ac196dbd4585"
- },
- {
- "id": "urn:li:container:94c696a054bab40b73e640a7f82e3b1c",
- "urn": "urn:li:container:94c696a054bab40b73e640a7f82e3b1c"
- }
- ]
+ "customProperties": {},
+ "externalUrl": "https://app.abc12345.ap-south-1.privatelink.snowflakecomputing.com/#/data/databases/TEST_DB/schemas/TEST_SCHEMA/table/TABLE_1/",
+ "name": "TABLE_1",
+ "qualifiedName": "TEST_DB.TEST_SCHEMA.TABLE_1",
+ "description": "Comment for Table",
+ "created": {
+ "time": 1623110400000
+ },
+ "lastModified": {
+ "time": 1623110400000
+ },
+ "tags": []
}
},
"systemMetadata": {
@@ -745,12 +832,25 @@
},
{
"entityType": "dataset",
- "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_3,PROD)",
+ "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.view_1,PROD)",
"changeType": "UPSERT",
- "aspectName": "status",
+ "aspectName": "browsePathsV2",
"aspect": {
"json": {
- "removed": false
+ "path": [
+ {
+ "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:snowflake,instance1)",
+ "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:snowflake,instance1)"
+ },
+ {
+ "id": "urn:li:container:900b1327253068cb1537b1b3c807ddab",
+ "urn": "urn:li:container:900b1327253068cb1537b1b3c807ddab"
+ },
+ {
+ "id": "urn:li:container:eac598ee71ef1b5e24448d650c08aa5f",
+ "urn": "urn:li:container:eac598ee71ef1b5e24448d650c08aa5f"
+ }
+ ]
}
},
"systemMetadata": {
@@ -761,12 +861,12 @@
},
{
"entityType": "dataset",
- "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_3,PROD)",
+ "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.view_1,PROD)",
"changeType": "UPSERT",
"aspectName": "schemaMetadata",
"aspect": {
"json": {
- "schemaName": "test_db.test_schema.table_3",
+ "schemaName": "test_db.test_schema.view_1",
"platform": "urn:li:dataPlatform:snowflake",
"version": 0,
"created": {
@@ -925,23 +1025,43 @@
},
{
"entityType": "dataset",
- "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_3,PROD)",
+ "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_1,PROD)",
"changeType": "UPSERT",
- "aspectName": "datasetProperties",
+ "aspectName": "subTypes",
"aspect": {
"json": {
- "customProperties": {},
- "externalUrl": "https://app.abc12345.ap-south-1.privatelink.snowflakecomputing.com/#/data/databases/TEST_DB/schemas/TEST_SCHEMA/table/TABLE_3/",
- "name": "TABLE_3",
- "qualifiedName": "TEST_DB.TEST_SCHEMA.TABLE_3",
- "description": "Comment for Table",
- "created": {
- "time": 1623110400000
- },
- "lastModified": {
- "time": 1623110400000
- },
- "tags": []
+ "typeNames": [
+ "Table"
+ ]
+ }
+ },
+ "systemMetadata": {
+ "lastObserved": 1654621200000,
+ "runId": "snowflake-2022_06_07-17_00_00",
+ "lastRunId": "no-run-id-provided"
+ }
+},
+{
+ "entityType": "dataset",
+ "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_1,PROD)",
+ "changeType": "UPSERT",
+ "aspectName": "browsePathsV2",
+ "aspect": {
+ "json": {
+ "path": [
+ {
+ "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:snowflake,instance1)",
+ "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:snowflake,instance1)"
+ },
+ {
+ "id": "urn:li:container:900b1327253068cb1537b1b3c807ddab",
+ "urn": "urn:li:container:900b1327253068cb1537b1b3c807ddab"
+ },
+ {
+ "id": "urn:li:container:eac598ee71ef1b5e24448d650c08aa5f",
+ "urn": "urn:li:container:eac598ee71ef1b5e24448d650c08aa5f"
+ }
+ ]
}
},
"systemMetadata": {
@@ -952,12 +1072,12 @@
},
{
"entityType": "dataset",
- "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_3,PROD)",
+ "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.view_1,PROD)",
"changeType": "UPSERT",
"aspectName": "container",
"aspect": {
"json": {
- "container": "urn:li:container:94c696a054bab40b73e640a7f82e3b1c"
+ "container": "urn:li:container:eac598ee71ef1b5e24448d650c08aa5f"
}
},
"systemMetadata": {
@@ -968,14 +1088,13 @@
},
{
"entityType": "dataset",
- "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_3,PROD)",
+ "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_1,PROD)",
"changeType": "UPSERT",
- "aspectName": "subTypes",
+ "aspectName": "dataPlatformInstance",
"aspect": {
"json": {
- "typeNames": [
- "Table"
- ]
+ "platform": "urn:li:dataPlatform:snowflake",
+ "instance": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:snowflake,instance1)"
}
},
"systemMetadata": {
@@ -986,19 +1105,131 @@
},
{
"entityType": "dataset",
- "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_3,PROD)",
+ "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.view_1,PROD)",
"changeType": "UPSERT",
- "aspectName": "browsePathsV2",
+ "aspectName": "upstreamLineage",
"aspect": {
"json": {
- "path": [
- {
- "id": "urn:li:container:5e359958be02ce647cd9ac196dbd4585",
- "urn": "urn:li:container:5e359958be02ce647cd9ac196dbd4585"
- },
+ "upstreams": [
{
- "id": "urn:li:container:94c696a054bab40b73e640a7f82e3b1c",
- "urn": "urn:li:container:94c696a054bab40b73e640a7f82e3b1c"
+ "auditStamp": {
+ "time": 0,
+ "actor": "urn:li:corpuser:unknown"
+ },
+ "dataset": "urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_1,PROD)",
+ "type": "VIEW"
+ }
+ ],
+ "fineGrainedLineages": [
+ {
+ "upstreamType": "FIELD_SET",
+ "upstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_1,PROD),col_1)"
+ ],
+ "downstreamType": "FIELD",
+ "downstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.view_1,PROD),col_1)"
+ ],
+ "confidenceScore": 1.0
+ },
+ {
+ "upstreamType": "FIELD_SET",
+ "upstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_1,PROD),col_10)"
+ ],
+ "downstreamType": "FIELD",
+ "downstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.view_1,PROD),col_10)"
+ ],
+ "confidenceScore": 1.0
+ },
+ {
+ "upstreamType": "FIELD_SET",
+ "upstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_1,PROD),col_2)"
+ ],
+ "downstreamType": "FIELD",
+ "downstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.view_1,PROD),col_2)"
+ ],
+ "confidenceScore": 1.0
+ },
+ {
+ "upstreamType": "FIELD_SET",
+ "upstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_1,PROD),col_3)"
+ ],
+ "downstreamType": "FIELD",
+ "downstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.view_1,PROD),col_3)"
+ ],
+ "confidenceScore": 1.0
+ },
+ {
+ "upstreamType": "FIELD_SET",
+ "upstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_1,PROD),col_4)"
+ ],
+ "downstreamType": "FIELD",
+ "downstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.view_1,PROD),col_4)"
+ ],
+ "confidenceScore": 1.0
+ },
+ {
+ "upstreamType": "FIELD_SET",
+ "upstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_1,PROD),col_5)"
+ ],
+ "downstreamType": "FIELD",
+ "downstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.view_1,PROD),col_5)"
+ ],
+ "confidenceScore": 1.0
+ },
+ {
+ "upstreamType": "FIELD_SET",
+ "upstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_1,PROD),col_6)"
+ ],
+ "downstreamType": "FIELD",
+ "downstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.view_1,PROD),col_6)"
+ ],
+ "confidenceScore": 1.0
+ },
+ {
+ "upstreamType": "FIELD_SET",
+ "upstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_1,PROD),col_7)"
+ ],
+ "downstreamType": "FIELD",
+ "downstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.view_1,PROD),col_7)"
+ ],
+ "confidenceScore": 1.0
+ },
+ {
+ "upstreamType": "FIELD_SET",
+ "upstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_1,PROD),col_8)"
+ ],
+ "downstreamType": "FIELD",
+ "downstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.view_1,PROD),col_8)"
+ ],
+ "confidenceScore": 1.0
+ },
+ {
+ "upstreamType": "FIELD_SET",
+ "upstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_1,PROD),col_9)"
+ ],
+ "downstreamType": "FIELD",
+ "downstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.view_1,PROD),col_9)"
+ ],
+ "confidenceScore": 1.0
}
]
}
@@ -1011,7 +1242,40 @@
},
{
"entityType": "dataset",
- "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_4,PROD)",
+ "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_10,PROD)",
+ "changeType": "UPSERT",
+ "aspectName": "dataPlatformInstance",
+ "aspect": {
+ "json": {
+ "platform": "urn:li:dataPlatform:snowflake",
+ "instance": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:snowflake,instance1)"
+ }
+ },
+ "systemMetadata": {
+ "lastObserved": 1654621200000,
+ "runId": "snowflake-2022_06_07-17_00_00",
+ "lastRunId": "no-run-id-provided"
+ }
+},
+{
+ "entityType": "dataset",
+ "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_10,PROD)",
+ "changeType": "UPSERT",
+ "aspectName": "status",
+ "aspect": {
+ "json": {
+ "removed": false
+ }
+ },
+ "systemMetadata": {
+ "lastObserved": 1654621200000,
+ "runId": "snowflake-2022_06_07-17_00_00",
+ "lastRunId": "no-run-id-provided"
+ }
+},
+{
+ "entityType": "dataset",
+ "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_2,PROD)",
"changeType": "UPSERT",
"aspectName": "status",
"aspect": {
@@ -1027,12 +1291,12 @@
},
{
"entityType": "dataset",
- "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_4,PROD)",
+ "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_2,PROD)",
"changeType": "UPSERT",
"aspectName": "schemaMetadata",
"aspect": {
"json": {
- "schemaName": "test_db.test_schema.table_4",
+ "schemaName": "test_db.test_schema.table_2",
"platform": "urn:li:dataPlatform:snowflake",
"version": 0,
"created": {
@@ -1191,15 +1455,15 @@
},
{
"entityType": "dataset",
- "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_4,PROD)",
+ "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_10,PROD)",
"changeType": "UPSERT",
"aspectName": "datasetProperties",
"aspect": {
"json": {
"customProperties": {},
- "externalUrl": "https://app.abc12345.ap-south-1.privatelink.snowflakecomputing.com/#/data/databases/TEST_DB/schemas/TEST_SCHEMA/table/TABLE_4/",
- "name": "TABLE_4",
- "qualifiedName": "TEST_DB.TEST_SCHEMA.TABLE_4",
+ "externalUrl": "https://app.abc12345.ap-south-1.privatelink.snowflakecomputing.com/#/data/databases/TEST_DB/schemas/TEST_SCHEMA/table/TABLE_10/",
+ "name": "TABLE_10",
+ "qualifiedName": "TEST_DB.TEST_SCHEMA.TABLE_10",
"description": "Comment for Table",
"created": {
"time": 1623110400000
@@ -1218,66 +1482,7 @@
},
{
"entityType": "dataset",
- "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_4,PROD)",
- "changeType": "UPSERT",
- "aspectName": "container",
- "aspect": {
- "json": {
- "container": "urn:li:container:94c696a054bab40b73e640a7f82e3b1c"
- }
- },
- "systemMetadata": {
- "lastObserved": 1654621200000,
- "runId": "snowflake-2022_06_07-17_00_00",
- "lastRunId": "no-run-id-provided"
- }
-},
-{
- "entityType": "dataset",
- "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_4,PROD)",
- "changeType": "UPSERT",
- "aspectName": "subTypes",
- "aspect": {
- "json": {
- "typeNames": [
- "Table"
- ]
- }
- },
- "systemMetadata": {
- "lastObserved": 1654621200000,
- "runId": "snowflake-2022_06_07-17_00_00",
- "lastRunId": "no-run-id-provided"
- }
-},
-{
- "entityType": "dataset",
- "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_4,PROD)",
- "changeType": "UPSERT",
- "aspectName": "browsePathsV2",
- "aspect": {
- "json": {
- "path": [
- {
- "id": "urn:li:container:5e359958be02ce647cd9ac196dbd4585",
- "urn": "urn:li:container:5e359958be02ce647cd9ac196dbd4585"
- },
- {
- "id": "urn:li:container:94c696a054bab40b73e640a7f82e3b1c",
- "urn": "urn:li:container:94c696a054bab40b73e640a7f82e3b1c"
- }
- ]
- }
- },
- "systemMetadata": {
- "lastObserved": 1654621200000,
- "runId": "snowflake-2022_06_07-17_00_00",
- "lastRunId": "no-run-id-provided"
- }
-},
-{
- "entityType": "dataset",
- "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_5,PROD)",
+ "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_5,PROD)",
"changeType": "UPSERT",
"aspectName": "status",
"aspect": {
@@ -1293,7 +1498,7 @@
},
{
"entityType": "dataset",
- "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_5,PROD)",
+ "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_5,PROD)",
"changeType": "UPSERT",
"aspectName": "schemaMetadata",
"aspect": {
@@ -1457,7 +1662,39 @@
},
{
"entityType": "dataset",
- "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_5,PROD)",
+ "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_2,PROD)",
+ "changeType": "UPSERT",
+ "aspectName": "container",
+ "aspect": {
+ "json": {
+ "container": "urn:li:container:eac598ee71ef1b5e24448d650c08aa5f"
+ }
+ },
+ "systemMetadata": {
+ "lastObserved": 1654621200000,
+ "runId": "snowflake-2022_06_07-17_00_00",
+ "lastRunId": "no-run-id-provided"
+ }
+},
+{
+ "entityType": "dataset",
+ "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_5,PROD)",
+ "changeType": "UPSERT",
+ "aspectName": "container",
+ "aspect": {
+ "json": {
+ "container": "urn:li:container:eac598ee71ef1b5e24448d650c08aa5f"
+ }
+ },
+ "systemMetadata": {
+ "lastObserved": 1654621200000,
+ "runId": "snowflake-2022_06_07-17_00_00",
+ "lastRunId": "no-run-id-provided"
+ }
+},
+{
+ "entityType": "dataset",
+ "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_5,PROD)",
"changeType": "UPSERT",
"aspectName": "datasetProperties",
"aspect": {
@@ -1484,12 +1721,41 @@
},
{
"entityType": "dataset",
- "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_5,PROD)",
+ "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_2,PROD)",
"changeType": "UPSERT",
- "aspectName": "container",
+ "aspectName": "datasetProperties",
+ "aspect": {
+ "json": {
+ "customProperties": {},
+ "externalUrl": "https://app.abc12345.ap-south-1.privatelink.snowflakecomputing.com/#/data/databases/TEST_DB/schemas/TEST_SCHEMA/table/TABLE_2/",
+ "name": "TABLE_2",
+ "qualifiedName": "TEST_DB.TEST_SCHEMA.TABLE_2",
+ "description": "Comment for Table",
+ "created": {
+ "time": 1623110400000
+ },
+ "lastModified": {
+ "time": 1623110400000
+ },
+ "tags": []
+ }
+ },
+ "systemMetadata": {
+ "lastObserved": 1654621200000,
+ "runId": "snowflake-2022_06_07-17_00_00",
+ "lastRunId": "no-run-id-provided"
+ }
+},
+{
+ "entityType": "dataset",
+ "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_10,PROD)",
+ "changeType": "UPSERT",
+ "aspectName": "subTypes",
"aspect": {
"json": {
- "container": "urn:li:container:94c696a054bab40b73e640a7f82e3b1c"
+ "typeNames": [
+ "Table"
+ ]
}
},
"systemMetadata": {
@@ -1500,7 +1766,7 @@
},
{
"entityType": "dataset",
- "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_5,PROD)",
+ "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_5,PROD)",
"changeType": "UPSERT",
"aspectName": "subTypes",
"aspect": {
@@ -1518,19 +1784,23 @@
},
{
"entityType": "dataset",
- "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_5,PROD)",
+ "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_5,PROD)",
"changeType": "UPSERT",
"aspectName": "browsePathsV2",
"aspect": {
"json": {
"path": [
{
- "id": "urn:li:container:5e359958be02ce647cd9ac196dbd4585",
- "urn": "urn:li:container:5e359958be02ce647cd9ac196dbd4585"
+ "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:snowflake,instance1)",
+ "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:snowflake,instance1)"
+ },
+ {
+ "id": "urn:li:container:900b1327253068cb1537b1b3c807ddab",
+ "urn": "urn:li:container:900b1327253068cb1537b1b3c807ddab"
},
{
- "id": "urn:li:container:94c696a054bab40b73e640a7f82e3b1c",
- "urn": "urn:li:container:94c696a054bab40b73e640a7f82e3b1c"
+ "id": "urn:li:container:eac598ee71ef1b5e24448d650c08aa5f",
+ "urn": "urn:li:container:eac598ee71ef1b5e24448d650c08aa5f"
}
]
}
@@ -1543,12 +1813,25 @@
},
{
"entityType": "dataset",
- "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_6,PROD)",
+ "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_10,PROD)",
"changeType": "UPSERT",
- "aspectName": "status",
+ "aspectName": "browsePathsV2",
"aspect": {
"json": {
- "removed": false
+ "path": [
+ {
+ "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:snowflake,instance1)",
+ "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:snowflake,instance1)"
+ },
+ {
+ "id": "urn:li:container:900b1327253068cb1537b1b3c807ddab",
+ "urn": "urn:li:container:900b1327253068cb1537b1b3c807ddab"
+ },
+ {
+ "id": "urn:li:container:eac598ee71ef1b5e24448d650c08aa5f",
+ "urn": "urn:li:container:eac598ee71ef1b5e24448d650c08aa5f"
+ }
+ ]
}
},
"systemMetadata": {
@@ -1559,12 +1842,76 @@
},
{
"entityType": "dataset",
- "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_6,PROD)",
+ "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_5,PROD)",
"changeType": "UPSERT",
- "aspectName": "schemaMetadata",
+ "aspectName": "dataPlatformInstance",
"aspect": {
"json": {
- "schemaName": "test_db.test_schema.table_6",
+ "platform": "urn:li:dataPlatform:snowflake",
+ "instance": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:snowflake,instance1)"
+ }
+ },
+ "systemMetadata": {
+ "lastObserved": 1654621200000,
+ "runId": "snowflake-2022_06_07-17_00_00",
+ "lastRunId": "no-run-id-provided"
+ }
+},
+{
+ "entityType": "dataset",
+ "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_2,PROD)",
+ "changeType": "UPSERT",
+ "aspectName": "subTypes",
+ "aspect": {
+ "json": {
+ "typeNames": [
+ "Table"
+ ]
+ }
+ },
+ "systemMetadata": {
+ "lastObserved": 1654621200000,
+ "runId": "snowflake-2022_06_07-17_00_00",
+ "lastRunId": "no-run-id-provided"
+ }
+},
+{
+ "entityType": "dataset",
+ "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_2,PROD)",
+ "changeType": "UPSERT",
+ "aspectName": "browsePathsV2",
+ "aspect": {
+ "json": {
+ "path": [
+ {
+ "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:snowflake,instance1)",
+ "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:snowflake,instance1)"
+ },
+ {
+ "id": "urn:li:container:900b1327253068cb1537b1b3c807ddab",
+ "urn": "urn:li:container:900b1327253068cb1537b1b3c807ddab"
+ },
+ {
+ "id": "urn:li:container:eac598ee71ef1b5e24448d650c08aa5f",
+ "urn": "urn:li:container:eac598ee71ef1b5e24448d650c08aa5f"
+ }
+ ]
+ }
+ },
+ "systemMetadata": {
+ "lastObserved": 1654621200000,
+ "runId": "snowflake-2022_06_07-17_00_00",
+ "lastRunId": "no-run-id-provided"
+ }
+},
+{
+ "entityType": "dataset",
+ "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_10,PROD)",
+ "changeType": "UPSERT",
+ "aspectName": "schemaMetadata",
+ "aspect": {
+ "json": {
+ "schemaName": "test_db.test_schema.table_10",
"platform": "urn:li:dataPlatform:snowflake",
"version": 0,
"created": {
@@ -1723,93 +2070,7 @@
},
{
"entityType": "dataset",
- "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_6,PROD)",
- "changeType": "UPSERT",
- "aspectName": "datasetProperties",
- "aspect": {
- "json": {
- "customProperties": {},
- "externalUrl": "https://app.abc12345.ap-south-1.privatelink.snowflakecomputing.com/#/data/databases/TEST_DB/schemas/TEST_SCHEMA/table/TABLE_6/",
- "name": "TABLE_6",
- "qualifiedName": "TEST_DB.TEST_SCHEMA.TABLE_6",
- "description": "Comment for Table",
- "created": {
- "time": 1623110400000
- },
- "lastModified": {
- "time": 1623110400000
- },
- "tags": []
- }
- },
- "systemMetadata": {
- "lastObserved": 1654621200000,
- "runId": "snowflake-2022_06_07-17_00_00",
- "lastRunId": "no-run-id-provided"
- }
-},
-{
- "entityType": "dataset",
- "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_6,PROD)",
- "changeType": "UPSERT",
- "aspectName": "container",
- "aspect": {
- "json": {
- "container": "urn:li:container:94c696a054bab40b73e640a7f82e3b1c"
- }
- },
- "systemMetadata": {
- "lastObserved": 1654621200000,
- "runId": "snowflake-2022_06_07-17_00_00",
- "lastRunId": "no-run-id-provided"
- }
-},
-{
- "entityType": "dataset",
- "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_6,PROD)",
- "changeType": "UPSERT",
- "aspectName": "subTypes",
- "aspect": {
- "json": {
- "typeNames": [
- "Table"
- ]
- }
- },
- "systemMetadata": {
- "lastObserved": 1654621200000,
- "runId": "snowflake-2022_06_07-17_00_00",
- "lastRunId": "no-run-id-provided"
- }
-},
-{
- "entityType": "dataset",
- "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_6,PROD)",
- "changeType": "UPSERT",
- "aspectName": "browsePathsV2",
- "aspect": {
- "json": {
- "path": [
- {
- "id": "urn:li:container:5e359958be02ce647cd9ac196dbd4585",
- "urn": "urn:li:container:5e359958be02ce647cd9ac196dbd4585"
- },
- {
- "id": "urn:li:container:94c696a054bab40b73e640a7f82e3b1c",
- "urn": "urn:li:container:94c696a054bab40b73e640a7f82e3b1c"
- }
- ]
- }
- },
- "systemMetadata": {
- "lastObserved": 1654621200000,
- "runId": "snowflake-2022_06_07-17_00_00",
- "lastRunId": "no-run-id-provided"
- }
-},
-{
- "entityType": "dataset",
- "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_7,PROD)",
+ "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_6,PROD)",
"changeType": "UPSERT",
"aspectName": "status",
"aspect": {
@@ -1825,12 +2086,12 @@
},
{
"entityType": "dataset",
- "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_7,PROD)",
+ "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_6,PROD)",
"changeType": "UPSERT",
"aspectName": "schemaMetadata",
"aspect": {
"json": {
- "schemaName": "test_db.test_schema.table_7",
+ "schemaName": "test_db.test_schema.table_6",
"platform": "urn:li:dataPlatform:snowflake",
"version": 0,
"created": {
@@ -1989,15 +2250,48 @@
},
{
"entityType": "dataset",
- "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_7,PROD)",
+ "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_2,PROD)",
+ "changeType": "UPSERT",
+ "aspectName": "dataPlatformInstance",
+ "aspect": {
+ "json": {
+ "platform": "urn:li:dataPlatform:snowflake",
+ "instance": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:snowflake,instance1)"
+ }
+ },
+ "systemMetadata": {
+ "lastObserved": 1654621200000,
+ "runId": "snowflake-2022_06_07-17_00_00",
+ "lastRunId": "no-run-id-provided"
+ }
+},
+{
+ "entityType": "dataset",
+ "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_6,PROD)",
+ "changeType": "UPSERT",
+ "aspectName": "container",
+ "aspect": {
+ "json": {
+ "container": "urn:li:container:eac598ee71ef1b5e24448d650c08aa5f"
+ }
+ },
+ "systemMetadata": {
+ "lastObserved": 1654621200000,
+ "runId": "snowflake-2022_06_07-17_00_00",
+ "lastRunId": "no-run-id-provided"
+ }
+},
+{
+ "entityType": "dataset",
+ "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_6,PROD)",
"changeType": "UPSERT",
"aspectName": "datasetProperties",
"aspect": {
"json": {
"customProperties": {},
- "externalUrl": "https://app.abc12345.ap-south-1.privatelink.snowflakecomputing.com/#/data/databases/TEST_DB/schemas/TEST_SCHEMA/table/TABLE_7/",
- "name": "TABLE_7",
- "qualifiedName": "TEST_DB.TEST_SCHEMA.TABLE_7",
+ "externalUrl": "https://app.abc12345.ap-south-1.privatelink.snowflakecomputing.com/#/data/databases/TEST_DB/schemas/TEST_SCHEMA/table/TABLE_6/",
+ "name": "TABLE_6",
+ "qualifiedName": "TEST_DB.TEST_SCHEMA.TABLE_6",
"description": "Comment for Table",
"created": {
"time": 1623110400000
@@ -2016,12 +2310,29 @@
},
{
"entityType": "dataset",
- "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_7,PROD)",
+ "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_10,PROD)",
"changeType": "UPSERT",
"aspectName": "container",
"aspect": {
"json": {
- "container": "urn:li:container:94c696a054bab40b73e640a7f82e3b1c"
+ "container": "urn:li:container:eac598ee71ef1b5e24448d650c08aa5f"
+ }
+ },
+ "systemMetadata": {
+ "lastObserved": 1654621200000,
+ "runId": "snowflake-2022_06_07-17_00_00",
+ "lastRunId": "no-run-id-provided"
+ }
+},
+{
+ "entityType": "dataset",
+ "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_7,PROD)",
+ "changeType": "UPSERT",
+ "aspectName": "dataPlatformInstance",
+ "aspect": {
+ "json": {
+ "platform": "urn:li:dataPlatform:snowflake",
+ "instance": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:snowflake,instance1)"
}
},
"systemMetadata": {
@@ -2032,7 +2343,7 @@
},
{
"entityType": "dataset",
- "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_7,PROD)",
+ "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_6,PROD)",
"changeType": "UPSERT",
"aspectName": "subTypes",
"aspect": {
@@ -2050,19 +2361,23 @@
},
{
"entityType": "dataset",
- "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_7,PROD)",
+ "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_6,PROD)",
"changeType": "UPSERT",
"aspectName": "browsePathsV2",
"aspect": {
"json": {
"path": [
{
- "id": "urn:li:container:5e359958be02ce647cd9ac196dbd4585",
- "urn": "urn:li:container:5e359958be02ce647cd9ac196dbd4585"
+ "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:snowflake,instance1)",
+ "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:snowflake,instance1)"
+ },
+ {
+ "id": "urn:li:container:900b1327253068cb1537b1b3c807ddab",
+ "urn": "urn:li:container:900b1327253068cb1537b1b3c807ddab"
},
{
- "id": "urn:li:container:94c696a054bab40b73e640a7f82e3b1c",
- "urn": "urn:li:container:94c696a054bab40b73e640a7f82e3b1c"
+ "id": "urn:li:container:eac598ee71ef1b5e24448d650c08aa5f",
+ "urn": "urn:li:container:eac598ee71ef1b5e24448d650c08aa5f"
}
]
}
@@ -2075,7 +2390,40 @@
},
{
"entityType": "dataset",
- "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_8,PROD)",
+ "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_7,PROD)",
+ "changeType": "UPSERT",
+ "aspectName": "status",
+ "aspect": {
+ "json": {
+ "removed": false
+ }
+ },
+ "systemMetadata": {
+ "lastObserved": 1654621200000,
+ "runId": "snowflake-2022_06_07-17_00_00",
+ "lastRunId": "no-run-id-provided"
+ }
+},
+{
+ "entityType": "dataset",
+ "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_6,PROD)",
+ "changeType": "UPSERT",
+ "aspectName": "dataPlatformInstance",
+ "aspect": {
+ "json": {
+ "platform": "urn:li:dataPlatform:snowflake",
+ "instance": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:snowflake,instance1)"
+ }
+ },
+ "systemMetadata": {
+ "lastObserved": 1654621200000,
+ "runId": "snowflake-2022_06_07-17_00_00",
+ "lastRunId": "no-run-id-provided"
+ }
+},
+{
+ "entityType": "dataset",
+ "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_4,PROD)",
"changeType": "UPSERT",
"aspectName": "status",
"aspect": {
@@ -2091,12 +2439,12 @@
},
{
"entityType": "dataset",
- "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_8,PROD)",
+ "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_4,PROD)",
"changeType": "UPSERT",
"aspectName": "schemaMetadata",
"aspect": {
"json": {
- "schemaName": "test_db.test_schema.table_8",
+ "schemaName": "test_db.test_schema.table_4",
"platform": "urn:li:dataPlatform:snowflake",
"version": 0,
"created": {
@@ -2255,15 +2603,15 @@
},
{
"entityType": "dataset",
- "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_8,PROD)",
+ "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_7,PROD)",
"changeType": "UPSERT",
"aspectName": "datasetProperties",
"aspect": {
"json": {
"customProperties": {},
- "externalUrl": "https://app.abc12345.ap-south-1.privatelink.snowflakecomputing.com/#/data/databases/TEST_DB/schemas/TEST_SCHEMA/table/TABLE_8/",
- "name": "TABLE_8",
- "qualifiedName": "TEST_DB.TEST_SCHEMA.TABLE_8",
+ "externalUrl": "https://app.abc12345.ap-south-1.privatelink.snowflakecomputing.com/#/data/databases/TEST_DB/schemas/TEST_SCHEMA/table/TABLE_7/",
+ "name": "TABLE_7",
+ "qualifiedName": "TEST_DB.TEST_SCHEMA.TABLE_7",
"description": "Comment for Table",
"created": {
"time": 1623110400000
@@ -2282,12 +2630,39 @@
},
{
"entityType": "dataset",
- "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_8,PROD)",
+ "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_4,PROD)",
"changeType": "UPSERT",
"aspectName": "container",
"aspect": {
"json": {
- "container": "urn:li:container:94c696a054bab40b73e640a7f82e3b1c"
+ "container": "urn:li:container:eac598ee71ef1b5e24448d650c08aa5f"
+ }
+ },
+ "systemMetadata": {
+ "lastObserved": 1654621200000,
+ "runId": "snowflake-2022_06_07-17_00_00",
+ "lastRunId": "no-run-id-provided"
+ }
+},
+{
+ "entityType": "dataset",
+ "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_4,PROD)",
+ "changeType": "UPSERT",
+ "aspectName": "datasetProperties",
+ "aspect": {
+ "json": {
+ "customProperties": {},
+ "externalUrl": "https://app.abc12345.ap-south-1.privatelink.snowflakecomputing.com/#/data/databases/TEST_DB/schemas/TEST_SCHEMA/table/TABLE_4/",
+ "name": "TABLE_4",
+ "qualifiedName": "TEST_DB.TEST_SCHEMA.TABLE_4",
+ "description": "Comment for Table",
+ "created": {
+ "time": 1623110400000
+ },
+ "lastModified": {
+ "time": 1623110400000
+ },
+ "tags": []
}
},
"systemMetadata": {
@@ -2298,7 +2673,7 @@
},
{
"entityType": "dataset",
- "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_8,PROD)",
+ "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_7,PROD)",
"changeType": "UPSERT",
"aspectName": "subTypes",
"aspect": {
@@ -2316,19 +2691,23 @@
},
{
"entityType": "dataset",
- "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_8,PROD)",
+ "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_7,PROD)",
"changeType": "UPSERT",
"aspectName": "browsePathsV2",
"aspect": {
"json": {
"path": [
{
- "id": "urn:li:container:5e359958be02ce647cd9ac196dbd4585",
- "urn": "urn:li:container:5e359958be02ce647cd9ac196dbd4585"
+ "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:snowflake,instance1)",
+ "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:snowflake,instance1)"
+ },
+ {
+ "id": "urn:li:container:900b1327253068cb1537b1b3c807ddab",
+ "urn": "urn:li:container:900b1327253068cb1537b1b3c807ddab"
},
{
- "id": "urn:li:container:94c696a054bab40b73e640a7f82e3b1c",
- "urn": "urn:li:container:94c696a054bab40b73e640a7f82e3b1c"
+ "id": "urn:li:container:eac598ee71ef1b5e24448d650c08aa5f",
+ "urn": "urn:li:container:eac598ee71ef1b5e24448d650c08aa5f"
}
]
}
@@ -2341,12 +2720,43 @@
},
{
"entityType": "dataset",
- "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_9,PROD)",
+ "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_4,PROD)",
"changeType": "UPSERT",
- "aspectName": "status",
+ "aspectName": "subTypes",
"aspect": {
"json": {
- "removed": false
+ "typeNames": [
+ "Table"
+ ]
+ }
+ },
+ "systemMetadata": {
+ "lastObserved": 1654621200000,
+ "runId": "snowflake-2022_06_07-17_00_00",
+ "lastRunId": "no-run-id-provided"
+ }
+},
+{
+ "entityType": "dataset",
+ "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_4,PROD)",
+ "changeType": "UPSERT",
+ "aspectName": "browsePathsV2",
+ "aspect": {
+ "json": {
+ "path": [
+ {
+ "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:snowflake,instance1)",
+ "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:snowflake,instance1)"
+ },
+ {
+ "id": "urn:li:container:900b1327253068cb1537b1b3c807ddab",
+ "urn": "urn:li:container:900b1327253068cb1537b1b3c807ddab"
+ },
+ {
+ "id": "urn:li:container:eac598ee71ef1b5e24448d650c08aa5f",
+ "urn": "urn:li:container:eac598ee71ef1b5e24448d650c08aa5f"
+ }
+ ]
}
},
"systemMetadata": {
@@ -2357,12 +2767,12 @@
},
{
"entityType": "dataset",
- "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_9,PROD)",
+ "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_7,PROD)",
"changeType": "UPSERT",
"aspectName": "schemaMetadata",
"aspect": {
"json": {
- "schemaName": "test_db.test_schema.table_9",
+ "schemaName": "test_db.test_schema.table_7",
"platform": "urn:li:dataPlatform:snowflake",
"version": 0,
"created": {
@@ -2521,93 +2931,7 @@
},
{
"entityType": "dataset",
- "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_9,PROD)",
- "changeType": "UPSERT",
- "aspectName": "datasetProperties",
- "aspect": {
- "json": {
- "customProperties": {},
- "externalUrl": "https://app.abc12345.ap-south-1.privatelink.snowflakecomputing.com/#/data/databases/TEST_DB/schemas/TEST_SCHEMA/table/TABLE_9/",
- "name": "TABLE_9",
- "qualifiedName": "TEST_DB.TEST_SCHEMA.TABLE_9",
- "description": "Comment for Table",
- "created": {
- "time": 1623110400000
- },
- "lastModified": {
- "time": 1623110400000
- },
- "tags": []
- }
- },
- "systemMetadata": {
- "lastObserved": 1654621200000,
- "runId": "snowflake-2022_06_07-17_00_00",
- "lastRunId": "no-run-id-provided"
- }
-},
-{
- "entityType": "dataset",
- "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_9,PROD)",
- "changeType": "UPSERT",
- "aspectName": "container",
- "aspect": {
- "json": {
- "container": "urn:li:container:94c696a054bab40b73e640a7f82e3b1c"
- }
- },
- "systemMetadata": {
- "lastObserved": 1654621200000,
- "runId": "snowflake-2022_06_07-17_00_00",
- "lastRunId": "no-run-id-provided"
- }
-},
-{
- "entityType": "dataset",
- "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_9,PROD)",
- "changeType": "UPSERT",
- "aspectName": "subTypes",
- "aspect": {
- "json": {
- "typeNames": [
- "Table"
- ]
- }
- },
- "systemMetadata": {
- "lastObserved": 1654621200000,
- "runId": "snowflake-2022_06_07-17_00_00",
- "lastRunId": "no-run-id-provided"
- }
-},
-{
- "entityType": "dataset",
- "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_9,PROD)",
- "changeType": "UPSERT",
- "aspectName": "browsePathsV2",
- "aspect": {
- "json": {
- "path": [
- {
- "id": "urn:li:container:5e359958be02ce647cd9ac196dbd4585",
- "urn": "urn:li:container:5e359958be02ce647cd9ac196dbd4585"
- },
- {
- "id": "urn:li:container:94c696a054bab40b73e640a7f82e3b1c",
- "urn": "urn:li:container:94c696a054bab40b73e640a7f82e3b1c"
- }
- ]
- }
- },
- "systemMetadata": {
- "lastObserved": 1654621200000,
- "runId": "snowflake-2022_06_07-17_00_00",
- "lastRunId": "no-run-id-provided"
- }
-},
-{
- "entityType": "dataset",
- "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_10,PROD)",
+ "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_8,PROD)",
"changeType": "UPSERT",
"aspectName": "status",
"aspect": {
@@ -2623,12 +2947,12 @@
},
{
"entityType": "dataset",
- "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_10,PROD)",
+ "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_8,PROD)",
"changeType": "UPSERT",
"aspectName": "schemaMetadata",
"aspect": {
"json": {
- "schemaName": "test_db.test_schema.table_10",
+ "schemaName": "test_db.test_schema.table_8",
"platform": "urn:li:dataPlatform:snowflake",
"version": 0,
"created": {
@@ -2787,23 +3111,13 @@
},
{
"entityType": "dataset",
- "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_10,PROD)",
+ "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_4,PROD)",
"changeType": "UPSERT",
- "aspectName": "datasetProperties",
+ "aspectName": "dataPlatformInstance",
"aspect": {
"json": {
- "customProperties": {},
- "externalUrl": "https://app.abc12345.ap-south-1.privatelink.snowflakecomputing.com/#/data/databases/TEST_DB/schemas/TEST_SCHEMA/table/TABLE_10/",
- "name": "TABLE_10",
- "qualifiedName": "TEST_DB.TEST_SCHEMA.TABLE_10",
- "description": "Comment for Table",
- "created": {
- "time": 1623110400000
- },
- "lastModified": {
- "time": 1623110400000
- },
- "tags": []
+ "platform": "urn:li:dataPlatform:snowflake",
+ "instance": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:snowflake,instance1)"
}
},
"systemMetadata": {
@@ -2814,12 +3128,12 @@
},
{
"entityType": "dataset",
- "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_10,PROD)",
+ "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_8,PROD)",
"changeType": "UPSERT",
"aspectName": "container",
"aspect": {
"json": {
- "container": "urn:li:container:94c696a054bab40b73e640a7f82e3b1c"
+ "container": "urn:li:container:eac598ee71ef1b5e24448d650c08aa5f"
}
},
"systemMetadata": {
@@ -2830,14 +3144,23 @@
},
{
"entityType": "dataset",
- "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_10,PROD)",
+ "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_8,PROD)",
"changeType": "UPSERT",
- "aspectName": "subTypes",
+ "aspectName": "datasetProperties",
"aspect": {
"json": {
- "typeNames": [
- "Table"
- ]
+ "customProperties": {},
+ "externalUrl": "https://app.abc12345.ap-south-1.privatelink.snowflakecomputing.com/#/data/databases/TEST_DB/schemas/TEST_SCHEMA/table/TABLE_8/",
+ "name": "TABLE_8",
+ "qualifiedName": "TEST_DB.TEST_SCHEMA.TABLE_8",
+ "description": "Comment for Table",
+ "created": {
+ "time": 1623110400000
+ },
+ "lastModified": {
+ "time": 1623110400000
+ },
+ "tags": []
}
},
"systemMetadata": {
@@ -2848,21 +3171,12 @@
},
{
"entityType": "dataset",
- "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_10,PROD)",
+ "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_7,PROD)",
"changeType": "UPSERT",
- "aspectName": "browsePathsV2",
+ "aspectName": "container",
"aspect": {
"json": {
- "path": [
- {
- "id": "urn:li:container:5e359958be02ce647cd9ac196dbd4585",
- "urn": "urn:li:container:5e359958be02ce647cd9ac196dbd4585"
- },
- {
- "id": "urn:li:container:94c696a054bab40b73e640a7f82e3b1c",
- "urn": "urn:li:container:94c696a054bab40b73e640a7f82e3b1c"
- }
- ]
+ "container": "urn:li:container:eac598ee71ef1b5e24448d650c08aa5f"
}
},
"systemMetadata": {
@@ -2873,21 +3187,13 @@
},
{
"entityType": "dataset",
- "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_1,PROD)",
+ "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_9,PROD)",
"changeType": "UPSERT",
- "aspectName": "upstreamLineage",
+ "aspectName": "dataPlatformInstance",
"aspect": {
"json": {
- "upstreams": [
- {
- "auditStamp": {
- "time": 0,
- "actor": "urn:li:corpuser:unknown"
- },
- "dataset": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD)",
- "type": "TRANSFORMED"
- }
- ]
+ "platform": "urn:li:dataPlatform:snowflake",
+ "instance": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:snowflake,instance1)"
}
},
"systemMetadata": {
@@ -2898,20 +3204,13 @@
},
{
"entityType": "dataset",
- "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD)",
+ "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_8,PROD)",
"changeType": "UPSERT",
- "aspectName": "upstreamLineage",
+ "aspectName": "subTypes",
"aspect": {
"json": {
- "upstreams": [
- {
- "auditStamp": {
- "time": 0,
- "actor": "urn:li:corpuser:unknown"
- },
- "dataset": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD)",
- "type": "TRANSFORMED"
- }
+ "typeNames": [
+ "Table"
]
}
},
@@ -2923,19 +3222,23 @@
},
{
"entityType": "dataset",
- "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_3,PROD)",
+ "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_8,PROD)",
"changeType": "UPSERT",
- "aspectName": "upstreamLineage",
+ "aspectName": "browsePathsV2",
"aspect": {
"json": {
- "upstreams": [
+ "path": [
{
- "auditStamp": {
- "time": 0,
- "actor": "urn:li:corpuser:unknown"
- },
- "dataset": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD)",
- "type": "TRANSFORMED"
+ "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:snowflake,instance1)",
+ "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:snowflake,instance1)"
+ },
+ {
+ "id": "urn:li:container:900b1327253068cb1537b1b3c807ddab",
+ "urn": "urn:li:container:900b1327253068cb1537b1b3c807ddab"
+ },
+ {
+ "id": "urn:li:container:eac598ee71ef1b5e24448d650c08aa5f",
+ "urn": "urn:li:container:eac598ee71ef1b5e24448d650c08aa5f"
}
]
}
@@ -2948,21 +3251,12 @@
},
{
"entityType": "dataset",
- "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_4,PROD)",
+ "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_9,PROD)",
"changeType": "UPSERT",
- "aspectName": "upstreamLineage",
+ "aspectName": "status",
"aspect": {
"json": {
- "upstreams": [
- {
- "auditStamp": {
- "time": 0,
- "actor": "urn:li:corpuser:unknown"
- },
- "dataset": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD)",
- "type": "TRANSFORMED"
- }
- ]
+ "removed": false
}
},
"systemMetadata": {
@@ -2973,21 +3267,13 @@
},
{
"entityType": "dataset",
- "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_5,PROD)",
+ "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_8,PROD)",
"changeType": "UPSERT",
- "aspectName": "upstreamLineage",
+ "aspectName": "dataPlatformInstance",
"aspect": {
"json": {
- "upstreams": [
- {
- "auditStamp": {
- "time": 0,
- "actor": "urn:li:corpuser:unknown"
- },
- "dataset": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD)",
- "type": "TRANSFORMED"
- }
- ]
+ "platform": "urn:li:dataPlatform:snowflake",
+ "instance": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:snowflake,instance1)"
}
},
"systemMetadata": {
@@ -2998,21 +3284,23 @@
},
{
"entityType": "dataset",
- "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_6,PROD)",
+ "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_9,PROD)",
"changeType": "UPSERT",
- "aspectName": "upstreamLineage",
+ "aspectName": "datasetProperties",
"aspect": {
"json": {
- "upstreams": [
- {
- "auditStamp": {
- "time": 0,
- "actor": "urn:li:corpuser:unknown"
- },
- "dataset": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD)",
- "type": "TRANSFORMED"
- }
- ]
+ "customProperties": {},
+ "externalUrl": "https://app.abc12345.ap-south-1.privatelink.snowflakecomputing.com/#/data/databases/TEST_DB/schemas/TEST_SCHEMA/table/TABLE_9/",
+ "name": "TABLE_9",
+ "qualifiedName": "TEST_DB.TEST_SCHEMA.TABLE_9",
+ "description": "Comment for Table",
+ "created": {
+ "time": 1623110400000
+ },
+ "lastModified": {
+ "time": 1623110400000
+ },
+ "tags": []
}
},
"systemMetadata": {
@@ -3023,20 +3311,13 @@
},
{
"entityType": "dataset",
- "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_7,PROD)",
+ "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_9,PROD)",
"changeType": "UPSERT",
- "aspectName": "upstreamLineage",
+ "aspectName": "subTypes",
"aspect": {
"json": {
- "upstreams": [
- {
- "auditStamp": {
- "time": 0,
- "actor": "urn:li:corpuser:unknown"
- },
- "dataset": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD)",
- "type": "TRANSFORMED"
- }
+ "typeNames": [
+ "Table"
]
}
},
@@ -3048,19 +3329,23 @@
},
{
"entityType": "dataset",
- "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_8,PROD)",
+ "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_9,PROD)",
"changeType": "UPSERT",
- "aspectName": "upstreamLineage",
+ "aspectName": "browsePathsV2",
"aspect": {
"json": {
- "upstreams": [
+ "path": [
{
- "auditStamp": {
- "time": 0,
- "actor": "urn:li:corpuser:unknown"
- },
- "dataset": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD)",
- "type": "TRANSFORMED"
+ "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:snowflake,instance1)",
+ "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:snowflake,instance1)"
+ },
+ {
+ "id": "urn:li:container:900b1327253068cb1537b1b3c807ddab",
+ "urn": "urn:li:container:900b1327253068cb1537b1b3c807ddab"
+ },
+ {
+ "id": "urn:li:container:eac598ee71ef1b5e24448d650c08aa5f",
+ "urn": "urn:li:container:eac598ee71ef1b5e24448d650c08aa5f"
}
]
}
@@ -3073,20 +3358,300 @@
},
{
"entityType": "dataset",
- "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_9,PROD)",
+ "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_9,PROD)",
"changeType": "UPSERT",
- "aspectName": "upstreamLineage",
+ "aspectName": "schemaMetadata",
"aspect": {
"json": {
- "upstreams": [
- {
- "auditStamp": {
- "time": 0,
- "actor": "urn:li:corpuser:unknown"
- },
- "dataset": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD)",
- "type": "TRANSFORMED"
- }
+ "schemaName": "test_db.test_schema.table_9",
+ "platform": "urn:li:dataPlatform:snowflake",
+ "version": 0,
+ "created": {
+ "time": 0,
+ "actor": "urn:li:corpuser:unknown"
+ },
+ "lastModified": {
+ "time": 0,
+ "actor": "urn:li:corpuser:unknown"
+ },
+ "hash": "",
+ "platformSchema": {
+ "com.linkedin.schema.MySqlDDL": {
+ "tableSchema": ""
+ }
+ },
+ "fields": [
+ {
+ "fieldPath": "col_1",
+ "nullable": false,
+ "description": "Comment for column",
+ "type": {
+ "type": {
+ "com.linkedin.schema.NumberType": {}
+ }
+ },
+ "nativeDataType": "NUMBER(38,0)",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "col_2",
+ "nullable": false,
+ "description": "Comment for column",
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "VARCHAR(255)",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "col_3",
+ "nullable": false,
+ "description": "Comment for column",
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "VARCHAR(255)",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "col_4",
+ "nullable": false,
+ "description": "Comment for column",
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "VARCHAR(255)",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "col_5",
+ "nullable": false,
+ "description": "Comment for column",
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "VARCHAR(255)",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "col_6",
+ "nullable": false,
+ "description": "Comment for column",
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "VARCHAR(255)",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "col_7",
+ "nullable": false,
+ "description": "Comment for column",
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "VARCHAR(255)",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "col_8",
+ "nullable": false,
+ "description": "Comment for column",
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "VARCHAR(255)",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "col_9",
+ "nullable": false,
+ "description": "Comment for column",
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "VARCHAR(255)",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "col_10",
+ "nullable": false,
+ "description": "Comment for column",
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "VARCHAR(255)",
+ "recursive": false,
+ "isPartOfKey": false
+ }
+ ]
+ }
+ },
+ "systemMetadata": {
+ "lastObserved": 1654621200000,
+ "runId": "snowflake-2022_06_07-17_00_00",
+ "lastRunId": "no-run-id-provided"
+ }
+},
+{
+ "entityType": "dataset",
+ "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_9,PROD)",
+ "changeType": "UPSERT",
+ "aspectName": "container",
+ "aspect": {
+ "json": {
+ "container": "urn:li:container:eac598ee71ef1b5e24448d650c08aa5f"
+ }
+ },
+ "systemMetadata": {
+ "lastObserved": 1654621200000,
+ "runId": "snowflake-2022_06_07-17_00_00",
+ "lastRunId": "no-run-id-provided"
+ }
+},
+{
+ "entityType": "dataset",
+ "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.view_2,PROD)",
+ "changeType": "UPSERT",
+ "aspectName": "viewProperties",
+ "aspect": {
+ "json": {
+ "materialized": false,
+ "viewLogic": "create view view_2 as select * from table_2",
+ "viewLanguage": "SQL"
+ }
+ },
+ "systemMetadata": {
+ "lastObserved": 1654621200000,
+ "runId": "snowflake-2022_06_07-17_00_00",
+ "lastRunId": "no-run-id-provided"
+ }
+},
+{
+ "entityType": "dataset",
+ "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.view_2,PROD)",
+ "changeType": "UPSERT",
+ "aspectName": "dataPlatformInstance",
+ "aspect": {
+ "json": {
+ "platform": "urn:li:dataPlatform:snowflake",
+ "instance": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:snowflake,instance1)"
+ }
+ },
+ "systemMetadata": {
+ "lastObserved": 1654621200000,
+ "runId": "snowflake-2022_06_07-17_00_00",
+ "lastRunId": "no-run-id-provided"
+ }
+},
+{
+ "entityType": "dataset",
+ "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.view_2,PROD)",
+ "changeType": "UPSERT",
+ "aspectName": "status",
+ "aspect": {
+ "json": {
+ "removed": false
+ }
+ },
+ "systemMetadata": {
+ "lastObserved": 1654621200000,
+ "runId": "snowflake-2022_06_07-17_00_00",
+ "lastRunId": "no-run-id-provided"
+ }
+},
+{
+ "entityType": "dataset",
+ "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.view_2,PROD)",
+ "changeType": "UPSERT",
+ "aspectName": "datasetProperties",
+ "aspect": {
+ "json": {
+ "customProperties": {},
+ "externalUrl": "https://app.abc12345.ap-south-1.privatelink.snowflakecomputing.com/#/data/databases/TEST_DB/schemas/TEST_SCHEMA/view/VIEW_2/",
+ "name": "VIEW_2",
+ "qualifiedName": "TEST_DB.TEST_SCHEMA.VIEW_2",
+ "description": "Comment for View",
+ "created": {
+ "time": 1623110400000
+ },
+ "lastModified": {
+ "time": 1623110400000
+ },
+ "tags": []
+ }
+ },
+ "systemMetadata": {
+ "lastObserved": 1654621200000,
+ "runId": "snowflake-2022_06_07-17_00_00",
+ "lastRunId": "no-run-id-provided"
+ }
+},
+{
+ "entityType": "dataset",
+ "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.view_2,PROD)",
+ "changeType": "UPSERT",
+ "aspectName": "subTypes",
+ "aspect": {
+ "json": {
+ "typeNames": [
+ "View"
+ ]
+ }
+ },
+ "systemMetadata": {
+ "lastObserved": 1654621200000,
+ "runId": "snowflake-2022_06_07-17_00_00",
+ "lastRunId": "no-run-id-provided"
+ }
+},
+{
+ "entityType": "dataset",
+ "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.view_2,PROD)",
+ "changeType": "UPSERT",
+ "aspectName": "browsePathsV2",
+ "aspect": {
+ "json": {
+ "path": [
+ {
+ "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:snowflake,instance1)",
+ "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:snowflake,instance1)"
+ },
+ {
+ "id": "urn:li:container:900b1327253068cb1537b1b3c807ddab",
+ "urn": "urn:li:container:900b1327253068cb1537b1b3c807ddab"
+ },
+ {
+ "id": "urn:li:container:eac598ee71ef1b5e24448d650c08aa5f",
+ "urn": "urn:li:container:eac598ee71ef1b5e24448d650c08aa5f"
+ }
]
}
},
@@ -3098,7 +3663,187 @@
},
{
"entityType": "dataset",
- "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_10,PROD)",
+ "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.view_2,PROD)",
+ "changeType": "UPSERT",
+ "aspectName": "schemaMetadata",
+ "aspect": {
+ "json": {
+ "schemaName": "test_db.test_schema.view_2",
+ "platform": "urn:li:dataPlatform:snowflake",
+ "version": 0,
+ "created": {
+ "time": 0,
+ "actor": "urn:li:corpuser:unknown"
+ },
+ "lastModified": {
+ "time": 0,
+ "actor": "urn:li:corpuser:unknown"
+ },
+ "hash": "",
+ "platformSchema": {
+ "com.linkedin.schema.MySqlDDL": {
+ "tableSchema": ""
+ }
+ },
+ "fields": [
+ {
+ "fieldPath": "col_1",
+ "nullable": false,
+ "description": "Comment for column",
+ "type": {
+ "type": {
+ "com.linkedin.schema.NumberType": {}
+ }
+ },
+ "nativeDataType": "NUMBER(38,0)",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "col_2",
+ "nullable": false,
+ "description": "Comment for column",
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "VARCHAR(255)",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "col_3",
+ "nullable": false,
+ "description": "Comment for column",
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "VARCHAR(255)",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "col_4",
+ "nullable": false,
+ "description": "Comment for column",
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "VARCHAR(255)",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "col_5",
+ "nullable": false,
+ "description": "Comment for column",
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "VARCHAR(255)",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "col_6",
+ "nullable": false,
+ "description": "Comment for column",
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "VARCHAR(255)",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "col_7",
+ "nullable": false,
+ "description": "Comment for column",
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "VARCHAR(255)",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "col_8",
+ "nullable": false,
+ "description": "Comment for column",
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "VARCHAR(255)",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "col_9",
+ "nullable": false,
+ "description": "Comment for column",
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "VARCHAR(255)",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "col_10",
+ "nullable": false,
+ "description": "Comment for column",
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "VARCHAR(255)",
+ "recursive": false,
+ "isPartOfKey": false
+ }
+ ]
+ }
+ },
+ "systemMetadata": {
+ "lastObserved": 1654621200000,
+ "runId": "snowflake-2022_06_07-17_00_00",
+ "lastRunId": "no-run-id-provided"
+ }
+},
+{
+ "entityType": "dataset",
+ "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.view_2,PROD)",
+ "changeType": "UPSERT",
+ "aspectName": "container",
+ "aspect": {
+ "json": {
+ "container": "urn:li:container:eac598ee71ef1b5e24448d650c08aa5f"
+ }
+ },
+ "systemMetadata": {
+ "lastObserved": 1654621200000,
+ "runId": "snowflake-2022_06_07-17_00_00",
+ "lastRunId": "no-run-id-provided"
+ }
+},
+{
+ "entityType": "dataset",
+ "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.view_2,PROD)",
"changeType": "UPSERT",
"aspectName": "upstreamLineage",
"aspect": {
@@ -3109,8 +3854,120 @@
"time": 0,
"actor": "urn:li:corpuser:unknown"
},
- "dataset": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD)",
- "type": "TRANSFORMED"
+ "dataset": "urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_2,PROD)",
+ "type": "VIEW"
+ }
+ ],
+ "fineGrainedLineages": [
+ {
+ "upstreamType": "FIELD_SET",
+ "upstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_2,PROD),col_1)"
+ ],
+ "downstreamType": "FIELD",
+ "downstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.view_2,PROD),col_1)"
+ ],
+ "confidenceScore": 1.0
+ },
+ {
+ "upstreamType": "FIELD_SET",
+ "upstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_2,PROD),col_10)"
+ ],
+ "downstreamType": "FIELD",
+ "downstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.view_2,PROD),col_10)"
+ ],
+ "confidenceScore": 1.0
+ },
+ {
+ "upstreamType": "FIELD_SET",
+ "upstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_2,PROD),col_2)"
+ ],
+ "downstreamType": "FIELD",
+ "downstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.view_2,PROD),col_2)"
+ ],
+ "confidenceScore": 1.0
+ },
+ {
+ "upstreamType": "FIELD_SET",
+ "upstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_2,PROD),col_3)"
+ ],
+ "downstreamType": "FIELD",
+ "downstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.view_2,PROD),col_3)"
+ ],
+ "confidenceScore": 1.0
+ },
+ {
+ "upstreamType": "FIELD_SET",
+ "upstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_2,PROD),col_4)"
+ ],
+ "downstreamType": "FIELD",
+ "downstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.view_2,PROD),col_4)"
+ ],
+ "confidenceScore": 1.0
+ },
+ {
+ "upstreamType": "FIELD_SET",
+ "upstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_2,PROD),col_5)"
+ ],
+ "downstreamType": "FIELD",
+ "downstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.view_2,PROD),col_5)"
+ ],
+ "confidenceScore": 1.0
+ },
+ {
+ "upstreamType": "FIELD_SET",
+ "upstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_2,PROD),col_6)"
+ ],
+ "downstreamType": "FIELD",
+ "downstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.view_2,PROD),col_6)"
+ ],
+ "confidenceScore": 1.0
+ },
+ {
+ "upstreamType": "FIELD_SET",
+ "upstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_2,PROD),col_7)"
+ ],
+ "downstreamType": "FIELD",
+ "downstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.view_2,PROD),col_7)"
+ ],
+ "confidenceScore": 1.0
+ },
+ {
+ "upstreamType": "FIELD_SET",
+ "upstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_2,PROD),col_8)"
+ ],
+ "downstreamType": "FIELD",
+ "downstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.view_2,PROD),col_8)"
+ ],
+ "confidenceScore": 1.0
+ },
+ {
+ "upstreamType": "FIELD_SET",
+ "upstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_2,PROD),col_9)"
+ ],
+ "downstreamType": "FIELD",
+ "downstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.view_2,PROD),col_9)"
+ ],
+ "confidenceScore": 1.0
}
]
}
diff --git a/metadata-ingestion/tests/integration/snowflake/test_snowflake.py b/metadata-ingestion/tests/integration/snowflake/test_snowflake.py
index 3dafe85ef950a..4c00e48ede9fb 100644
--- a/metadata-ingestion/tests/integration/snowflake/test_snowflake.py
+++ b/metadata-ingestion/tests/integration/snowflake/test_snowflake.py
@@ -211,11 +211,12 @@ def test_snowflake_private_link(pytestconfig, tmp_path, mock_time, mock_datahub_
include_technical_schema=True,
include_table_lineage=True,
include_column_lineage=False,
- include_views=False,
- include_view_lineage=False,
+ include_views=True,
+ include_view_lineage=True,
include_usage_stats=False,
incremental_lineage=False,
include_operational_stats=False,
+ platform_instance="instance1",
start_time=datetime(2022, 6, 6, 0, 0, 0, 0).replace(
tzinfo=timezone.utc
),
From 4d2c009d400406b3cc41767864b07e9933dfe841 Mon Sep 17 00:00:00 2001
From: kushagra-apptware <81357546+kushagra-apptware@users.noreply.github.com>
Date: Sat, 28 Oct 2023 01:32:11 +0530
Subject: [PATCH 015/792] feat: Add flag to hide/display the autocomplete query
for search bar (#9104)
Co-authored-by: John Joyce
---
datahub-web-react/src/app/home/HomePageHeader.tsx | 1 +
datahub-web-react/src/app/search/SearchBar.tsx | 6 ++++--
datahub-web-react/src/app/search/SearchHeader.tsx | 1 +
3 files changed, 6 insertions(+), 2 deletions(-)
diff --git a/datahub-web-react/src/app/home/HomePageHeader.tsx b/datahub-web-react/src/app/home/HomePageHeader.tsx
index 5919d2dbf5b7e..e5c01252a865b 100644
--- a/datahub-web-react/src/app/home/HomePageHeader.tsx
+++ b/datahub-web-react/src/app/home/HomePageHeader.tsx
@@ -275,6 +275,7 @@ export const HomePageHeader = () => {
viewsEnabled={viewsEnabled}
combineSiblings
showQuickFilters
+ showViewAllResults
/>
{searchResultsToShow && searchResultsToShow.length > 0 && (
diff --git a/datahub-web-react/src/app/search/SearchBar.tsx b/datahub-web-react/src/app/search/SearchBar.tsx
index b4699994bc460..5f797e68fe0e8 100644
--- a/datahub-web-react/src/app/search/SearchBar.tsx
+++ b/datahub-web-react/src/app/search/SearchBar.tsx
@@ -119,6 +119,7 @@ interface Props {
setIsSearchBarFocused?: (isSearchBarFocused: boolean) => void;
onFocus?: () => void;
onBlur?: () => void;
+ showViewAllResults?: boolean;
}
const defaultProps = {
@@ -146,6 +147,7 @@ export const SearchBar = ({
setIsSearchBarFocused,
onFocus,
onBlur,
+ showViewAllResults = false,
}: Props) => {
const history = useHistory();
const [searchQuery, setSearchQuery] = useState(initialQuery);
@@ -203,7 +205,7 @@ export const SearchBar = ({
const { quickFilters, selectedQuickFilter, setSelectedQuickFilter } = useQuickFiltersContext();
const autoCompleteQueryOptions = useMemo(() => {
- if (effectiveQuery === '') return [];
+ if (effectiveQuery === '' || !showViewAllResults) return [];
return [
{
@@ -212,7 +214,7 @@ export const SearchBar = ({
type: EXACT_AUTOCOMPLETE_OPTION_TYPE,
},
];
- }, [effectiveQuery]);
+ }, [effectiveQuery, showViewAllResults]);
const autoCompleteEntityOptions = useMemo(() => {
return suggestions.map((suggestion: AutoCompleteResultForEntity) => {
diff --git a/datahub-web-react/src/app/search/SearchHeader.tsx b/datahub-web-react/src/app/search/SearchHeader.tsx
index 74bc562e275d1..91f9753a3d601 100644
--- a/datahub-web-react/src/app/search/SearchHeader.tsx
+++ b/datahub-web-react/src/app/search/SearchHeader.tsx
@@ -107,6 +107,7 @@ export const SearchHeader = ({
combineSiblings
fixAutoComplete
showQuickFilters
+ showViewAllResults
/>
From aceff13ebb2d6758a5e42b592f4b5eb7d5af29e3 Mon Sep 17 00:00:00 2001
From: Mayuri Nehate <33225191+mayurinehate@users.noreply.github.com>
Date: Sat, 28 Oct 2023 04:55:57 +0530
Subject: [PATCH 016/792] docs(timeline): correct markdown heading level
(#9126)
---
docs/dev-guides/timeline.md | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/docs/dev-guides/timeline.md b/docs/dev-guides/timeline.md
index 829aef1d3eefa..6a8e158d40ebf 100644
--- a/docs/dev-guides/timeline.md
+++ b/docs/dev-guides/timeline.md
@@ -228,7 +228,7 @@ http://localhost:8080/openapi/timeline/v1/urn%3Ali%3Adataset%3A%28urn%3Ali%3Adat
REMOVE GLOSSARY_TERM dataset:hive:testTimelineDataset (urn:li:glossaryTerm:SavingsAccount): The GlossaryTerm 'SavingsAccount' for the entity 'urn:li:dataset:(urn:li:dataPlatform:hive,testTimelineDataset,PROD)' has been removed.
```
-# Explore the API
+## Explore the API
The API is browse-able via the UI through through the dropdown.
Here are a few screenshots showing how to navigate to it. You can try out the API and send example requests.
@@ -243,7 +243,7 @@ Here are a few screenshots showing how to navigate to it. You can try out the AP
-# Future Work
+## Future Work
- Supporting versions as start and end parameters as part of the call to the timeline API
- Supporting entities beyond Datasets
From 9ae0e93d82eac2040af2c3d23d52878e57e19df1 Mon Sep 17 00:00:00 2001
From: Ellie O'Neil <110510035+eboneil@users.noreply.github.com>
Date: Fri, 27 Oct 2023 20:18:31 -0700
Subject: [PATCH 017/792] docs(graphql): Correct mutation -> query for
searchAcrossLineage examples (#9134)
---
docs/api/tutorials/lineage.md | 8 ++------
metadata-ingestion/examples/library/read_lineage_rest.py | 2 +-
2 files changed, 3 insertions(+), 7 deletions(-)
diff --git a/docs/api/tutorials/lineage.md b/docs/api/tutorials/lineage.md
index 4baad09099d07..13ec716b7870b 100644
--- a/docs/api/tutorials/lineage.md
+++ b/docs/api/tutorials/lineage.md
@@ -113,12 +113,10 @@ Expected Response:
You can now see the lineage between `fct_users_deleted` and `logging_events`.
-
-
## Add Column-level Lineage
@@ -135,12 +133,10 @@ You can now see the lineage between `fct_users_deleted` and `logging_events`.
You can now see the column-level lineage between datasets. Note that you have to enable `Show Columns` to be able to see the column-level lineage.
-
-
## Read Lineage
@@ -180,7 +176,7 @@ query searchAcrossLineage {
}
```
-This example shows using lineage degrees as a filter, but additional search filters can be included here as well.
+This example shows using lineage degrees as a filter, but additional search filters can be included here as well.
@@ -188,7 +184,7 @@ This example shows using lineage degrees as a filter, but additional search filt
```shell
curl --location --request POST 'http://localhost:8080/api/graphql' \
--header 'Authorization: Bearer ' \
---header 'Content-Type: application/json' --data-raw '{ { "query": "mutation searchAcrossLineage { searchAcrossLineage( input: { query: \"*\" urn: \"urn:li:dataset:(urn:li:dataPlatform:dbt,long_tail_companions.adoption.human_profiles,PROD)\" start: 0 count: 10 direction: DOWNSTREAM orFilters: [ { and: [ { condition: EQUAL negated: false field: \"degree\" values: [\"1\", \"2\", \"3+\"] } ] } ] } ) { searchResults { degree entity { urn type } } }}"
+--header 'Content-Type: application/json' --data-raw '{ { "query": "query searchAcrossLineage { searchAcrossLineage( input: { query: \"*\" urn: \"urn:li:dataset:(urn:li:dataPlatform:dbt,long_tail_companions.adoption.human_profiles,PROD)\" start: 0 count: 10 direction: DOWNSTREAM orFilters: [ { and: [ { condition: EQUAL negated: false field: \"degree\" values: [\"1\", \"2\", \"3+\"] } ] } ] } ) { searchResults { degree entity { urn type } } }}"
}}'
```
diff --git a/metadata-ingestion/examples/library/read_lineage_rest.py b/metadata-ingestion/examples/library/read_lineage_rest.py
index 34437ed86280d..bd9b4e8651dba 100644
--- a/metadata-ingestion/examples/library/read_lineage_rest.py
+++ b/metadata-ingestion/examples/library/read_lineage_rest.py
@@ -6,7 +6,7 @@
# Query multiple aspects from entity
query = """
-mutation searchAcrossLineage {
+query searchAcrossLineage {
searchAcrossLineage(
input: {
query: "*"
From 3f4ab44a91bff734e0a0437622d7579410875ec5 Mon Sep 17 00:00:00 2001
From: david-leifker <114954101+david-leifker@users.noreply.github.com>
Date: Sun, 29 Oct 2023 16:26:05 -0500
Subject: [PATCH 018/792] feat(kafka): increase kafka message size and enable
compression (#9038)
Co-authored-by: Pedro Silva
---
.github/workflows/docker-unified.yml | 10 +++-
build.gradle | 2 +-
.../app/client/KafkaTrackingProducer.java | 10 ++--
.../app/config/ConfigurationProvider.java | 6 ++-
docker/broker/env/docker.env | 4 +-
docker/datahub-frontend/Dockerfile | 4 +-
docker/datahub-gms/Dockerfile | 4 +-
docker/datahub-mae-consumer/Dockerfile | 4 +-
docker/datahub-mce-consumer/Dockerfile | 4 +-
docker/datahub-upgrade/Dockerfile | 4 +-
docker/kafka-setup/kafka-config.sh | 2 +
docker/kafka-setup/kafka-setup.sh | 46 ++++++++++++-------
docker/kafka-setup/kafka-topic-workers.sh | 10 +++-
.../docker-compose-m1.quickstart.yml | 2 +
...er-compose-without-neo4j-m1.quickstart.yml | 2 +
...ocker-compose-without-neo4j.quickstart.yml | 2 +
.../quickstart/docker-compose.quickstart.yml | 2 +
docs/deploy/environment-vars.md | 22 +++++----
.../config/kafka/ConsumerConfiguration.java | 10 ++++
.../config/kafka/KafkaConfiguration.java | 2 +
.../config/kafka/ProducerConfiguration.java | 4 ++
.../src/main/resources/application.yml | 4 ++
.../kafka/DataHubKafkaProducerFactory.java | 2 +
.../kafka/KafkaEventConsumerFactory.java | 4 ++
.../kafka/SimpleKafkaConsumerFactory.java | 9 +++-
25 files changed, 135 insertions(+), 40 deletions(-)
create mode 100644 metadata-service/configuration/src/main/java/com/linkedin/metadata/config/kafka/ConsumerConfiguration.java
diff --git a/.github/workflows/docker-unified.yml b/.github/workflows/docker-unified.yml
index 8666a5e2e2171..5f5a62de6288c 100644
--- a/.github/workflows/docker-unified.yml
+++ b/.github/workflows/docker-unified.yml
@@ -851,8 +851,14 @@ jobs:
if: failure()
run: |
docker ps -a
- docker logs datahub-gms >& gms-${{ matrix.test_strategy }}.log
- docker logs datahub-actions >& actions-${{ matrix.test_strategy }}.log
+ docker logs datahub-gms >& gms-${{ matrix.test_strategy }}.log || true
+ docker logs datahub-actions >& actions-${{ matrix.test_strategy }}.log || true
+ docker logs datahub-mae-consumer >& mae-${{ matrix.test_strategy }}.log || true
+ docker logs datahub-mce-consumer >& mce-${{ matrix.test_strategy }}.log || true
+ docker logs broker >& broker-${{ matrix.test_strategy }}.log || true
+ docker logs mysql >& mysql-${{ matrix.test_strategy }}.log || true
+ docker logs elasticsearch >& elasticsearch-${{ matrix.test_strategy }}.log || true
+ docker logs datahub-frontend-react >& frontend-${{ matrix.test_strategy }}.log || true
- name: Upload logs
uses: actions/upload-artifact@v3
if: failure()
diff --git a/build.gradle b/build.gradle
index cf55a59cfe694..bd282535fa13c 100644
--- a/build.gradle
+++ b/build.gradle
@@ -39,7 +39,7 @@ buildscript {
plugins {
id 'com.gorylenko.gradle-git-properties' version '2.4.0-rc2'
id 'com.github.johnrengelman.shadow' version '6.1.0'
- id 'com.palantir.docker' version '0.35.0'
+ id 'com.palantir.docker' version '0.35.0' apply false
// https://blog.ltgt.net/javax-jakarta-mess-and-gradle-solution/
// TODO id "org.gradlex.java-ecosystem-capabilities" version "1.0"
}
diff --git a/datahub-frontend/app/client/KafkaTrackingProducer.java b/datahub-frontend/app/client/KafkaTrackingProducer.java
index fab17f9215d4a..59e91a6d5a0f7 100644
--- a/datahub-frontend/app/client/KafkaTrackingProducer.java
+++ b/datahub-frontend/app/client/KafkaTrackingProducer.java
@@ -1,6 +1,8 @@
package client;
+import com.linkedin.metadata.config.kafka.ProducerConfiguration;
import com.typesafe.config.Config;
+import config.ConfigurationProvider;
import org.apache.kafka.clients.CommonClientConfigs;
import org.apache.kafka.clients.producer.KafkaProducer;
import org.apache.kafka.clients.producer.ProducerConfig;
@@ -35,12 +37,12 @@ public class KafkaTrackingProducer {
private final KafkaProducer _producer;
@Inject
- public KafkaTrackingProducer(@Nonnull Config config, ApplicationLifecycle lifecycle) {
+ public KafkaTrackingProducer(@Nonnull Config config, ApplicationLifecycle lifecycle, final ConfigurationProvider configurationProvider) {
_isEnabled = !config.hasPath("analytics.enabled") || config.getBoolean("analytics.enabled");
if (_isEnabled) {
_logger.debug("Analytics tracking is enabled");
- _producer = createKafkaProducer(config);
+ _producer = createKafkaProducer(config, configurationProvider.getKafka().getProducer());
lifecycle.addStopHook(
() -> {
@@ -62,13 +64,15 @@ public void send(ProducerRecord record) {
_producer.send(record);
}
- private static KafkaProducer createKafkaProducer(Config config) {
+ private static KafkaProducer createKafkaProducer(Config config, ProducerConfiguration producerConfiguration) {
final Properties props = new Properties();
props.put(ProducerConfig.CLIENT_ID_CONFIG, "datahub-frontend");
props.put(ProducerConfig.DELIVERY_TIMEOUT_MS_CONFIG, config.getString("analytics.kafka.delivery.timeout.ms"));
props.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, config.getString("analytics.kafka.bootstrap.server"));
props.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, "org.apache.kafka.common.serialization.StringSerializer"); // Actor urn.
props.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, "org.apache.kafka.common.serialization.StringSerializer"); // JSON object.
+ props.put(ProducerConfig.MAX_REQUEST_SIZE_CONFIG, producerConfiguration.getMaxRequestSize());
+ props.put(ProducerConfig.COMPRESSION_TYPE_CONFIG, producerConfiguration.getCompressionType());
final String securityProtocolConfig = "analytics.kafka.security.protocol";
if (config.hasPath(securityProtocolConfig)
diff --git a/datahub-frontend/app/config/ConfigurationProvider.java b/datahub-frontend/app/config/ConfigurationProvider.java
index 00a5472ec3476..8f526c831b5c9 100644
--- a/datahub-frontend/app/config/ConfigurationProvider.java
+++ b/datahub-frontend/app/config/ConfigurationProvider.java
@@ -1,6 +1,7 @@
package config;
import com.linkedin.metadata.config.cache.CacheConfiguration;
+import com.linkedin.metadata.config.kafka.KafkaConfiguration;
import com.linkedin.metadata.spring.YamlPropertySourceFactory;
import lombok.Data;
@@ -11,7 +12,6 @@
/**
* Minimal sharing between metadata-service and frontend
- * Initially for use of client caching configuration.
* Does not use the factories module to avoid transitive dependencies.
*/
@EnableConfigurationProperties
@@ -19,6 +19,10 @@
@ConfigurationProperties
@Data
public class ConfigurationProvider {
+ /**
+ * Kafka related configs.
+ */
+ private KafkaConfiguration kafka;
/**
* Configuration for caching
diff --git a/docker/broker/env/docker.env b/docker/broker/env/docker.env
index 18115697c2832..6eb958609daf1 100644
--- a/docker/broker/env/docker.env
+++ b/docker/broker/env/docker.env
@@ -5,4 +5,6 @@ KAFKA_ADVERTISED_LISTENERS=PLAINTEXT://broker:29092,PLAINTEXT_HOST://localhost:9
KAFKA_OFFSETS_TOPIC_REPLICATION_FACTOR=1
KAFKA_GROUP_INITIAL_REBALANCE_DELAY_MS=0
KAFKA_HEAP_OPTS=-Xms256m -Xmx256m
-KAFKA_CONFLUENT_SUPPORT_METRICS_ENABLE=false
\ No newline at end of file
+KAFKA_CONFLUENT_SUPPORT_METRICS_ENABLE=false
+KAFKA_MESSAGE_MAX_BYTES=5242880
+KAFKA_MAX_MESSAGE_BYTES=5242880
\ No newline at end of file
diff --git a/docker/datahub-frontend/Dockerfile b/docker/datahub-frontend/Dockerfile
index 9efc0d2ce8753..9c13e73078042 100644
--- a/docker/datahub-frontend/Dockerfile
+++ b/docker/datahub-frontend/Dockerfile
@@ -8,10 +8,12 @@ RUN addgroup -S datahub && adduser -S datahub -G datahub
# Upgrade Alpine and base packages
# PFP-260: Upgrade Sqlite to >=3.28.0-r0 to fix https://security.snyk.io/vuln/SNYK-ALPINE39-SQLITE-449762
RUN apk --no-cache --update-cache --available upgrade \
- && apk --no-cache add curl sqlite \
+ && apk --no-cache add curl sqlite libc6-compat java-snappy \
&& apk --no-cache add openjdk11-jre --repository=http://dl-cdn.alpinelinux.org/alpine/edge/community \
&& apk --no-cache add jattach --repository http://dl-cdn.alpinelinux.org/alpine/edge/community/
+ENV LD_LIBRARY_PATH="/lib:/lib64"
+
FROM base as prod-install
COPY ./datahub-frontend.zip /
diff --git a/docker/datahub-gms/Dockerfile b/docker/datahub-gms/Dockerfile
index f5428f7480403..e271188a703cc 100644
--- a/docker/datahub-gms/Dockerfile
+++ b/docker/datahub-gms/Dockerfile
@@ -18,7 +18,7 @@ FROM alpine:3 AS base
ENV JMX_VERSION=0.18.0
# PFP-260: Upgrade Sqlite to >=3.28.0-r0 to fix https://security.snyk.io/vuln/SNYK-ALPINE39-SQLITE-449762
RUN apk --no-cache --update-cache --available upgrade \
- && apk --no-cache add curl bash coreutils gcompat sqlite \
+ && apk --no-cache add curl bash coreutils gcompat sqlite libc6-compat java-snappy \
&& apk --no-cache add openjdk11-jre --repository=http://dl-cdn.alpinelinux.org/alpine/edge/community \
&& apk --no-cache add jattach --repository http://dl-cdn.alpinelinux.org/alpine/edge/community/ \
&& curl -sS https://repo1.maven.org/maven2/org/eclipse/jetty/jetty-runner/9.4.46.v20220331/jetty-runner-9.4.46.v20220331.jar --output jetty-runner.jar \
@@ -29,6 +29,8 @@ RUN apk --no-cache --update-cache --available upgrade \
&& cp /usr/lib/jvm/java-11-openjdk/jre/lib/security/cacerts /tmp/kafka.client.truststore.jks
COPY --from=binary /go/bin/dockerize /usr/local/bin
+ENV LD_LIBRARY_PATH="/lib:/lib64"
+
FROM base as prod-install
COPY war.war /datahub/datahub-gms/bin/war.war
COPY metadata-models/src/main/resources/entity-registry.yml /datahub/datahub-gms/resources/entity-registry.yml
diff --git a/docker/datahub-mae-consumer/Dockerfile b/docker/datahub-mae-consumer/Dockerfile
index 4b321b1639c1b..ec3da4de71d15 100644
--- a/docker/datahub-mae-consumer/Dockerfile
+++ b/docker/datahub-mae-consumer/Dockerfile
@@ -18,7 +18,7 @@ FROM alpine:3 AS base
ENV JMX_VERSION=0.18.0
# PFP-260: Upgrade Sqlite to >=3.28.0-r0 to fix https://security.snyk.io/vuln/SNYK-ALPINE39-SQLITE-449762
RUN apk --no-cache --update-cache --available upgrade \
- && apk --no-cache add curl bash coreutils sqlite \
+ && apk --no-cache add curl bash coreutils sqlite libc6-compat java-snappy \
&& apk --no-cache add openjdk11-jre --repository=http://dl-cdn.alpinelinux.org/alpine/edge/community \
&& apk --no-cache add jattach --repository http://dl-cdn.alpinelinux.org/alpine/edge/community/ \
&& wget --no-verbose https://github.com/open-telemetry/opentelemetry-java-instrumentation/releases/download/v1.24.0/opentelemetry-javaagent.jar \
@@ -26,6 +26,8 @@ RUN apk --no-cache --update-cache --available upgrade \
&& cp /usr/lib/jvm/java-11-openjdk/jre/lib/security/cacerts /tmp/kafka.client.truststore.jks
COPY --from=binary /go/bin/dockerize /usr/local/bin
+ENV LD_LIBRARY_PATH="/lib:/lib64"
+
FROM base as prod-install
COPY mae-consumer-job.jar /datahub/datahub-mae-consumer/bin/
COPY metadata-models/src/main/resources/entity-registry.yml /datahub/datahub-mae-consumer/resources/entity-registry.yml
diff --git a/docker/datahub-mce-consumer/Dockerfile b/docker/datahub-mce-consumer/Dockerfile
index 4d38ee6daa235..f9c47f77a98f5 100644
--- a/docker/datahub-mce-consumer/Dockerfile
+++ b/docker/datahub-mce-consumer/Dockerfile
@@ -18,7 +18,7 @@ FROM alpine:3 AS base
ENV JMX_VERSION=0.18.0
# PFP-260: Upgrade Sqlite to >=3.28.0-r0 to fix https://security.snyk.io/vuln/SNYK-ALPINE39-SQLITE-449762
RUN apk --no-cache --update-cache --available upgrade \
- && apk --no-cache add curl bash sqlite \
+ && apk --no-cache add curl bash sqlite libc6-compat java-snappy \
&& apk --no-cache add openjdk11-jre --repository=http://dl-cdn.alpinelinux.org/alpine/edge/community \
&& apk --no-cache add jattach --repository http://dl-cdn.alpinelinux.org/alpine/edge/community/ \
&& wget --no-verbose https://github.com/open-telemetry/opentelemetry-java-instrumentation/releases/download/v1.24.0/opentelemetry-javaagent.jar \
@@ -33,6 +33,8 @@ COPY docker/datahub-mce-consumer/start.sh /datahub/datahub-mce-consumer/scripts/
COPY docker/monitoring/client-prometheus-config.yaml /datahub/datahub-mce-consumer/scripts/prometheus-config.yaml
RUN chmod +x /datahub/datahub-mce-consumer/scripts/start.sh
+ENV LD_LIBRARY_PATH="/lib:/lib64"
+
FROM base as dev-install
# Dummy stage for development. Assumes code is built on your machine and mounted to this image.
# See this excellent thread https://github.com/docker/cli/issues/1134
diff --git a/docker/datahub-upgrade/Dockerfile b/docker/datahub-upgrade/Dockerfile
index 945be54678a24..f08e7268e4018 100644
--- a/docker/datahub-upgrade/Dockerfile
+++ b/docker/datahub-upgrade/Dockerfile
@@ -18,7 +18,7 @@ FROM alpine:3 AS base
ENV JMX_VERSION=0.18.0
# PFP-260: Upgrade Sqlite to >=3.28.0-r0 to fix https://security.snyk.io/vuln/SNYK-ALPINE39-SQLITE-449762
RUN apk --no-cache --update-cache --available upgrade \
- && apk --no-cache add curl bash coreutils gcompat sqlite \
+ && apk --no-cache add curl bash coreutils gcompat sqlite libc6-compat java-snappy \
&& apk --no-cache add openjdk11-jre --repository=http://dl-cdn.alpinelinux.org/alpine/edge/community \
&& curl -sS https://repo1.maven.org/maven2/org/eclipse/jetty/jetty-runner/9.4.46.v20220331/jetty-runner-9.4.46.v20220331.jar --output jetty-runner.jar \
&& curl -sS https://repo1.maven.org/maven2/org/eclipse/jetty/jetty-jmx/9.4.46.v20220331/jetty-jmx-9.4.46.v20220331.jar --output jetty-jmx.jar \
@@ -28,6 +28,8 @@ RUN apk --no-cache --update-cache --available upgrade \
&& cp /usr/lib/jvm/java-11-openjdk/jre/lib/security/cacerts /tmp/kafka.client.truststore.jks
COPY --from=binary /go/bin/dockerize /usr/local/bin
+ENV LD_LIBRARY_PATH="/lib:/lib64"
+
FROM base as prod-install
COPY datahub-upgrade.jar /datahub/datahub-upgrade/bin/
COPY metadata-models/src/main/resources/entity-registry.yml /datahub/datahub-gms/resources/entity-registry.yml
diff --git a/docker/kafka-setup/kafka-config.sh b/docker/kafka-setup/kafka-config.sh
index 2ba8e2d7c5d47..4d5698ccc3856 100644
--- a/docker/kafka-setup/kafka-config.sh
+++ b/docker/kafka-setup/kafka-config.sh
@@ -2,6 +2,7 @@
: ${PARTITIONS:=1}
: ${REPLICATION_FACTOR:=1}
+: ${MAX_MESSAGE_BYTES:=5242880}
: ${KAFKA_PROPERTIES_SECURITY_PROTOCOL:=PLAINTEXT}
@@ -12,3 +13,4 @@ export KAFKA_HEAP_OPTS="-Xmx64M"
CONNECTION_PROPERTIES_PATH=/tmp/connection.properties
WORKERS=4
+DELIMITER=";"
diff --git a/docker/kafka-setup/kafka-setup.sh b/docker/kafka-setup/kafka-setup.sh
index b5024e49e59f1..439ffb4d4d829 100755
--- a/docker/kafka-setup/kafka-setup.sh
+++ b/docker/kafka-setup/kafka-setup.sh
@@ -102,24 +102,43 @@ exec 4<&-
send() {
work_id=$1
topic_args=$2
- echo sending $work_id $topic_args
- echo "$work_id" "$topic_args" 1>&3 ## the fifo is fd 3
+ topic_config=$3
+
+ echo -e "sending $work_id\n worker_args: ${topic_args}${DELIMITER}${topic_config}"
+ echo "$work_id" "${topic_args}${DELIMITER}${topic_config}" 1>&3 ## the fifo is fd 3
}
## Produce the jobs to run.
-send "$METADATA_AUDIT_EVENT_NAME" "--partitions $PARTITIONS --topic $METADATA_AUDIT_EVENT_NAME"
-send "$METADATA_CHANGE_EVENT_NAME" "--partitions $PARTITIONS --topic $METADATA_CHANGE_EVENT_NAME"
-send "$FAILED_METADATA_CHANGE_EVENT_NAME" "--partitions $PARTITIONS --topic $FAILED_METADATA_CHANGE_EVENT_NAME"
-send "$METADATA_CHANGE_LOG_VERSIONED_TOPIC_NAME" "--partitions $PARTITIONS --topic $METADATA_CHANGE_LOG_VERSIONED_TOPIC_NAME"
+send "$METADATA_AUDIT_EVENT_NAME" "--partitions $PARTITIONS --topic $METADATA_AUDIT_EVENT_NAME" \
+ "--entity-type topics --entity-name $METADATA_AUDIT_EVENT_NAME --alter --add-config max.message.bytes=$MAX_MESSAGE_BYTES"
+
+send "$METADATA_CHANGE_EVENT_NAME" "--partitions $PARTITIONS --topic $METADATA_CHANGE_EVENT_NAME" \
+ "--entity-type topics --entity-name $METADATA_CHANGE_EVENT_NAME --alter --add-config max.message.bytes=$MAX_MESSAGE_BYTES"
+send "$FAILED_METADATA_CHANGE_EVENT_NAME" "--partitions $PARTITIONS --topic $FAILED_METADATA_CHANGE_EVENT_NAME" \
+ "--entity-type topics --entity-name $FAILED_METADATA_CHANGE_EVENT_NAME --alter --add-config max.message.bytes=$MAX_MESSAGE_BYTES"
+
+send "$METADATA_CHANGE_LOG_VERSIONED_TOPIC_NAME" "--partitions $PARTITIONS --topic $METADATA_CHANGE_LOG_VERSIONED_TOPIC_NAME" \
+ "--entity-type topics --entity-name $METADATA_CHANGE_LOG_VERSIONED_TOPIC_NAME --alter --add-config max.message.bytes=$MAX_MESSAGE_BYTES"
# Set retention to 90 days
-send "$METADATA_CHANGE_LOG_TIMESERIES_TOPIC_NAME" "--partitions $PARTITIONS --config retention.ms=7776000000 --topic $METADATA_CHANGE_LOG_TIMESERIES_TOPIC_NAME"
-send "$METADATA_CHANGE_PROPOSAL_TOPIC_NAME" "--partitions $PARTITIONS --topic $METADATA_CHANGE_PROPOSAL_TOPIC_NAME"
-send "$FAILED_METADATA_CHANGE_PROPOSAL_TOPIC_NAME" "--partitions $PARTITIONS --topic $FAILED_METADATA_CHANGE_PROPOSAL_TOPIC_NAME"
-send "$PLATFORM_EVENT_TOPIC_NAME" "--partitions $PARTITIONS --topic $PLATFORM_EVENT_TOPIC_NAME"
+send "$METADATA_CHANGE_LOG_TIMESERIES_TOPIC_NAME" "--partitions $PARTITIONS --config retention.ms=7776000000 --topic $METADATA_CHANGE_LOG_TIMESERIES_TOPIC_NAME" \
+ "--entity-type topics --entity-name $METADATA_CHANGE_LOG_TIMESERIES_TOPIC_NAME --alter --add-config max.message.bytes=$MAX_MESSAGE_BYTES"
+
+send "$METADATA_CHANGE_PROPOSAL_TOPIC_NAME" "--partitions $PARTITIONS --topic $METADATA_CHANGE_PROPOSAL_TOPIC_NAME" \
+ "--entity-type topics --entity-name $METADATA_CHANGE_PROPOSAL_TOPIC_NAME --alter --add-config max.message.bytes=$MAX_MESSAGE_BYTES"
+send "$FAILED_METADATA_CHANGE_PROPOSAL_TOPIC_NAME" "--partitions $PARTITIONS --topic $FAILED_METADATA_CHANGE_PROPOSAL_TOPIC_NAME" \
+ "--entity-type topics --entity-name $FAILED_METADATA_CHANGE_PROPOSAL_TOPIC_NAME --alter --add-config max.message.bytes=$MAX_MESSAGE_BYTES"
+
+send "$PLATFORM_EVENT_TOPIC_NAME" "--partitions $PARTITIONS --topic $PLATFORM_EVENT_TOPIC_NAME" \
+ "--entity-type topics --entity-name $PLATFORM_EVENT_TOPIC_NAME --alter --add-config max.message.bytes=$MAX_MESSAGE_BYTES"
# Infinite retention upgrade topic
-send "$DATAHUB_UPGRADE_HISTORY_TOPIC_NAME" "--partitions 1 --config retention.ms=-1 --topic $DATAHUB_UPGRADE_HISTORY_TOPIC_NAME"
+ # Make sure the retention.ms config for $DATAHUB_UPGRADE_HISTORY_TOPIC_NAME is configured to infinite
+ # Please see the bug report below for details
+ # https://github.com/datahub-project/datahub/issues/7882
+send "$DATAHUB_UPGRADE_HISTORY_TOPIC_NAME" "--partitions 1 --config retention.ms=-1 --topic $DATAHUB_UPGRADE_HISTORY_TOPIC_NAME" \
+ "--entity-type topics --entity-name "$DATAHUB_UPGRADE_HISTORY_TOPIC_NAME" --alter --add-config retention.ms=-1"
+
# Create topic for datahub usage event
if [[ $DATAHUB_ANALYTICS_ENABLED == true ]]; then
send "$DATAHUB_USAGE_EVENT_NAME" "--partitions $PARTITIONS --topic $DATAHUB_USAGE_EVENT_NAME"
@@ -150,8 +169,3 @@ if [[ $USE_CONFLUENT_SCHEMA_REGISTRY == "TRUE" ]]; then
--entity-name _schemas \
--alter --add-config cleanup.policy=compact
fi
-
-# Make sure the retention.ms config for $DATAHUB_UPGRADE_HISTORY_TOPIC_NAME is configured to infinite
-# Please see the bug report below for details
-# https://github.com/datahub-project/datahub/issues/7882
-kafka-configs.sh --command-config $CONNECTION_PROPERTIES_PATH --bootstrap-server $KAFKA_BOOTSTRAP_SERVER --entity-type topics --entity-name "$DATAHUB_UPGRADE_HISTORY_TOPIC_NAME" --alter --add-config retention.ms=-1
diff --git a/docker/kafka-setup/kafka-topic-workers.sh b/docker/kafka-setup/kafka-topic-workers.sh
index fd0d45c3f4611..3ddf41abbabf5 100644
--- a/docker/kafka-setup/kafka-topic-workers.sh
+++ b/docker/kafka-setup/kafka-topic-workers.sh
@@ -11,10 +11,18 @@ START_LOCK=$4
## the queue workers are supposed to be doing
job() {
i=$1
- topic_args=$2
+ worker_args=$2
+ topic_args=$(echo $worker_args | cut -d "$DELIMITER" -f 1)
+ topic_config=$(echo $worker_args | cut -d "$DELIMITER" -f 2)
+
+ echo " $i: kafka-topics.sh --create --if-not-exist $topic_args"
kafka-topics.sh --create --if-not-exists --command-config $CONNECTION_PROPERTIES_PATH --bootstrap-server $KAFKA_BOOTSTRAP_SERVER \
--replication-factor $REPLICATION_FACTOR \
$topic_args
+ if [[ ! -z "$topic_config" ]]; then
+ echo " $i: kafka-configs.sh $topic_config"
+ kafka-configs.sh --command-config $CONNECTION_PROPERTIES_PATH --bootstrap-server $KAFKA_BOOTSTRAP_SERVER $topic_config
+ fi
}
## This is the worker to read from the queue.
diff --git a/docker/quickstart/docker-compose-m1.quickstart.yml b/docker/quickstart/docker-compose-m1.quickstart.yml
index 89e9aaa0defd6..c5de687d335b9 100644
--- a/docker/quickstart/docker-compose-m1.quickstart.yml
+++ b/docker/quickstart/docker-compose-m1.quickstart.yml
@@ -16,6 +16,8 @@ services:
- KAFKA_GROUP_INITIAL_REBALANCE_DELAY_MS=0
- KAFKA_HEAP_OPTS=-Xms256m -Xmx256m
- KAFKA_CONFLUENT_SUPPORT_METRICS_ENABLE=false
+ - KAFKA_MESSAGE_MAX_BYTES=5242880
+ - KAFKA_MAX_MESSAGE_BYTES=5242880
healthcheck:
interval: 1s
retries: 5
diff --git a/docker/quickstart/docker-compose-without-neo4j-m1.quickstart.yml b/docker/quickstart/docker-compose-without-neo4j-m1.quickstart.yml
index f6284edc83648..b6935f24c5ce2 100644
--- a/docker/quickstart/docker-compose-without-neo4j-m1.quickstart.yml
+++ b/docker/quickstart/docker-compose-without-neo4j-m1.quickstart.yml
@@ -16,6 +16,8 @@ services:
- KAFKA_GROUP_INITIAL_REBALANCE_DELAY_MS=0
- KAFKA_HEAP_OPTS=-Xms256m -Xmx256m
- KAFKA_CONFLUENT_SUPPORT_METRICS_ENABLE=false
+ - KAFKA_MESSAGE_MAX_BYTES=5242880
+ - KAFKA_MAX_MESSAGE_BYTES=5242880
healthcheck:
interval: 1s
retries: 5
diff --git a/docker/quickstart/docker-compose-without-neo4j.quickstart.yml b/docker/quickstart/docker-compose-without-neo4j.quickstart.yml
index 4e3503e35c0db..4ff8bbd70da85 100644
--- a/docker/quickstart/docker-compose-without-neo4j.quickstart.yml
+++ b/docker/quickstart/docker-compose-without-neo4j.quickstart.yml
@@ -16,6 +16,8 @@ services:
- KAFKA_GROUP_INITIAL_REBALANCE_DELAY_MS=0
- KAFKA_HEAP_OPTS=-Xms256m -Xmx256m
- KAFKA_CONFLUENT_SUPPORT_METRICS_ENABLE=false
+ - KAFKA_MESSAGE_MAX_BYTES=5242880
+ - KAFKA_MAX_MESSAGE_BYTES=5242880
healthcheck:
interval: 1s
retries: 5
diff --git a/docker/quickstart/docker-compose.quickstart.yml b/docker/quickstart/docker-compose.quickstart.yml
index e2f52064389e0..f2950ebab2c9d 100644
--- a/docker/quickstart/docker-compose.quickstart.yml
+++ b/docker/quickstart/docker-compose.quickstart.yml
@@ -16,6 +16,8 @@ services:
- KAFKA_GROUP_INITIAL_REBALANCE_DELAY_MS=0
- KAFKA_HEAP_OPTS=-Xms256m -Xmx256m
- KAFKA_CONFLUENT_SUPPORT_METRICS_ENABLE=false
+ - KAFKA_MESSAGE_MAX_BYTES=5242880
+ - KAFKA_MAX_MESSAGE_BYTES=5242880
healthcheck:
interval: 1s
retries: 5
diff --git a/docs/deploy/environment-vars.md b/docs/deploy/environment-vars.md
index 779c3d3d7c432..4c7b249349ca0 100644
--- a/docs/deploy/environment-vars.md
+++ b/docs/deploy/environment-vars.md
@@ -67,15 +67,19 @@ In general, there are **lots** of Kafka configuration environment variables for
These environment variables follow the standard Spring representation of properties as environment variables.
Simply replace the dot, `.`, with an underscore, `_`, and convert to uppercase.
-| Variable | Default | Unit/Type | Components | Description |
-|-----------------------------------------------------|----------------------------------------------|-----------|-----------------------------------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
-| `KAFKA_LISTENER_CONCURRENCY` | 1 | integer | [`GMS`, `MCE Consumer`, `MAE Consumer`] | Number of Kafka consumer threads. Optimize throughput by matching to topic partitions. |
-| `SPRING_KAFKA_PRODUCER_PROPERTIES_MAX_REQUEST_SIZE` | 1048576 | bytes | [`GMS`, `MCE Consumer`, `MAE Consumer`] | Max produced message size. Note that the topic configuration is not controlled by this variable. |
-| `SCHEMA_REGISTRY_TYPE` | `INTERNAL` | string | [`GMS`, `MCE Consumer`, `MAE Consumer`] | Schema registry implementation. One of `INTERNAL` or `KAFKA` or `AWS_GLUE` |
-| `KAFKA_SCHEMAREGISTRY_URL` | `http://localhost:8080/schema-registry/api/` | string | [`GMS`, `MCE Consumer`, `MAE Consumer`] | Schema registry url. Used for `INTERNAL` and `KAFKA`. The default value is for the `GMS` component. The `MCE Consumer` and `MAE Consumer` should be the `GMS` hostname and port. |
-| `AWS_GLUE_SCHEMA_REGISTRY_REGION` | `us-east-1` | string | [`GMS`, `MCE Consumer`, `MAE Consumer`] | If using `AWS_GLUE` in the `SCHEMA_REGISTRY_TYPE` variable for the schema registry implementation. |
-| `AWS_GLUE_SCHEMA_REGISTRY_NAME` | `` | string | [`GMS`, `MCE Consumer`, `MAE Consumer`] | If using `AWS_GLUE` in the `SCHEMA_REGISTRY_TYPE` variable for the schema registry. |
-| `USE_CONFLUENT_SCHEMA_REGISTRY` | `true` | boolean | [`kafka-setup`] | Enable Confluent schema registry configuration. |
+| Variable | Default | Unit/Type | Components | Description |
+|-----------------------------------------------------|----------------------------------------------|-----------|--------------------------------------------------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| `KAFKA_LISTENER_CONCURRENCY` | 1 | integer | [`GMS`, `MCE Consumer`, `MAE Consumer`] | Number of Kafka consumer threads. Optimize throughput by matching to topic partitions. |
+| `SPRING_KAFKA_PRODUCER_PROPERTIES_MAX_REQUEST_SIZE` | 1048576 | bytes | [`GMS`, `MCE Consumer`, `MAE Consumer`] | Max produced message size. Note that the topic configuration is not controlled by this variable. |
+| `SCHEMA_REGISTRY_TYPE` | `INTERNAL` | string | [`GMS`, `MCE Consumer`, `MAE Consumer`] | Schema registry implementation. One of `INTERNAL` or `KAFKA` or `AWS_GLUE` |
+| `KAFKA_SCHEMAREGISTRY_URL` | `http://localhost:8080/schema-registry/api/` | string | [`GMS`, `MCE Consumer`, `MAE Consumer`] | Schema registry url. Used for `INTERNAL` and `KAFKA`. The default value is for the `GMS` component. The `MCE Consumer` and `MAE Consumer` should be the `GMS` hostname and port. |
+| `AWS_GLUE_SCHEMA_REGISTRY_REGION` | `us-east-1` | string | [`GMS`, `MCE Consumer`, `MAE Consumer`] | If using `AWS_GLUE` in the `SCHEMA_REGISTRY_TYPE` variable for the schema registry implementation. |
+| `AWS_GLUE_SCHEMA_REGISTRY_NAME` | `` | string | [`GMS`, `MCE Consumer`, `MAE Consumer`] | If using `AWS_GLUE` in the `SCHEMA_REGISTRY_TYPE` variable for the schema registry. |
+| `USE_CONFLUENT_SCHEMA_REGISTRY` | `true` | boolean | [`kafka-setup`] | Enable Confluent schema registry configuration. |
+| `KAFKA_PRODUCER_MAX_REQUEST_SIZE` | `5242880` | integer | [`Frontend`, `GMS`, `MCE Consumer`, `MAE Consumer`] | Max produced message size. Note that the topic configuration is not controlled by this variable. |
+| `KAFKA_CONSUMER_MAX_PARTITION_FETCH_BYTES` | `5242880` | integer | [`GMS`, `MCE Consumer`, `MAE Consumer`] | The maximum amount of data per-partition the server will return. Records are fetched in batches by the consumer. If the first record batch in the first non-empty partition of the fetch is larger than this limit, the batch will still be returned to ensure that the consumer can make progress. |
+| `MAX_MESSAGE_BYTES` | `5242880` | integer | [`kafka-setup`] | Sets the max message size on the kakfa topics. |
+| `KAFKA_PRODUCER_COMPRESSION_TYPE` | `snappy` | string | [`Frontend`, `GMS`, `MCE Consumer`, `MAE Consumer`] | The compression used by the producer. |
## Frontend
diff --git a/metadata-service/configuration/src/main/java/com/linkedin/metadata/config/kafka/ConsumerConfiguration.java b/metadata-service/configuration/src/main/java/com/linkedin/metadata/config/kafka/ConsumerConfiguration.java
new file mode 100644
index 0000000000000..7a93119226a2d
--- /dev/null
+++ b/metadata-service/configuration/src/main/java/com/linkedin/metadata/config/kafka/ConsumerConfiguration.java
@@ -0,0 +1,10 @@
+package com.linkedin.metadata.config.kafka;
+
+import lombok.Data;
+
+
+@Data
+public class ConsumerConfiguration {
+
+ private int maxPartitionFetchBytes;
+}
diff --git a/metadata-service/configuration/src/main/java/com/linkedin/metadata/config/kafka/KafkaConfiguration.java b/metadata-service/configuration/src/main/java/com/linkedin/metadata/config/kafka/KafkaConfiguration.java
index 2966abfc63396..2345f88352c17 100644
--- a/metadata-service/configuration/src/main/java/com/linkedin/metadata/config/kafka/KafkaConfiguration.java
+++ b/metadata-service/configuration/src/main/java/com/linkedin/metadata/config/kafka/KafkaConfiguration.java
@@ -12,4 +12,6 @@ public class KafkaConfiguration {
private SchemaRegistryConfiguration schemaRegistry;
private ProducerConfiguration producer;
+
+ private ConsumerConfiguration consumer;
}
diff --git a/metadata-service/configuration/src/main/java/com/linkedin/metadata/config/kafka/ProducerConfiguration.java b/metadata-service/configuration/src/main/java/com/linkedin/metadata/config/kafka/ProducerConfiguration.java
index 2bf4cea3f0c18..26a8c6b649133 100644
--- a/metadata-service/configuration/src/main/java/com/linkedin/metadata/config/kafka/ProducerConfiguration.java
+++ b/metadata-service/configuration/src/main/java/com/linkedin/metadata/config/kafka/ProducerConfiguration.java
@@ -13,4 +13,8 @@ public class ProducerConfiguration {
private int requestTimeout;
private int backoffTimeout;
+
+ private String compressionType;
+
+ private int maxRequestSize;
}
diff --git a/metadata-service/configuration/src/main/resources/application.yml b/metadata-service/configuration/src/main/resources/application.yml
index 5d72e24748072..b817208672e08 100644
--- a/metadata-service/configuration/src/main/resources/application.yml
+++ b/metadata-service/configuration/src/main/resources/application.yml
@@ -228,6 +228,10 @@ kafka:
deliveryTimeout: ${KAFKA_PRODUCER_DELIVERY_TIMEOUT:30000}
requestTimeout: ${KAFKA_PRODUCER_REQUEST_TIMEOUT:3000}
backoffTimeout: ${KAFKA_PRODUCER_BACKOFF_TIMEOUT:500}
+ compressionType: ${KAFKA_PRODUCER_COMPRESSION_TYPE:snappy} # producer's compression algorithm
+ maxRequestSize: ${KAFKA_PRODUCER_MAX_REQUEST_SIZE:5242880} # the max bytes sent by the producer, also see kafka-setup MAX_MESSAGE_BYTES for matching value
+ consumer:
+ maxPartitionFetchBytes: ${KAFKA_CONSUMER_MAX_PARTITION_FETCH_BYTES:5242880} # the max bytes consumed per partition
schemaRegistry:
type: ${SCHEMA_REGISTRY_TYPE:KAFKA} # INTERNAL or KAFKA or AWS_GLUE
url: ${KAFKA_SCHEMAREGISTRY_URL:http://localhost:8081}
diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/kafka/DataHubKafkaProducerFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/kafka/DataHubKafkaProducerFactory.java
index c67a2e704681f..78b3de501e0e5 100644
--- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/kafka/DataHubKafkaProducerFactory.java
+++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/kafka/DataHubKafkaProducerFactory.java
@@ -59,6 +59,8 @@ public static Map buildProducerProperties(SchemaRegistryConfig s
props.put(ProducerConfig.DELIVERY_TIMEOUT_MS_CONFIG, kafkaConfiguration.getProducer().getDeliveryTimeout());
props.put(ProducerConfig.REQUEST_TIMEOUT_MS_CONFIG, kafkaConfiguration.getProducer().getRequestTimeout());
props.put(ProducerConfig.RETRY_BACKOFF_MS_CONFIG, kafkaConfiguration.getProducer().getBackoffTimeout());
+ props.put(ProducerConfig.COMPRESSION_TYPE_CONFIG, kafkaConfiguration.getProducer().getCompressionType());
+ props.put(ProducerConfig.MAX_REQUEST_SIZE_CONFIG, kafkaConfiguration.getProducer().getMaxRequestSize());
// Override KafkaProperties with SchemaRegistryConfig only for non-empty values
schemaRegistryConfig.getProperties().entrySet()
diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/kafka/KafkaEventConsumerFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/kafka/KafkaEventConsumerFactory.java
index ba18be6834d14..7a9e80781d639 100644
--- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/kafka/KafkaEventConsumerFactory.java
+++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/kafka/KafkaEventConsumerFactory.java
@@ -70,6 +70,7 @@ private static Map buildCustomizedProperties(KafkaProperties bas
consumerProps.setEnableAutoCommit(true);
consumerProps.setAutoCommitInterval(Duration.ofSeconds(10));
+
// KAFKA_BOOTSTRAP_SERVER has precedence over SPRING_KAFKA_BOOTSTRAP_SERVERS
if (kafkaConfiguration.getBootstrapServers() != null && kafkaConfiguration.getBootstrapServers().length() > 0) {
consumerProps.setBootstrapServers(Arrays.asList(kafkaConfiguration.getBootstrapServers().split(",")));
@@ -84,6 +85,9 @@ private static Map buildCustomizedProperties(KafkaProperties bas
.filter(entry -> entry.getValue() != null && !entry.getValue().toString().isEmpty())
.forEach(entry -> customizedProperties.put(entry.getKey(), entry.getValue()));
+ customizedProperties.put(ConsumerConfig.MAX_PARTITION_FETCH_BYTES_CONFIG,
+ kafkaConfiguration.getConsumer().getMaxPartitionFetchBytes());
+
return customizedProperties;
}
diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/kafka/SimpleKafkaConsumerFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/kafka/SimpleKafkaConsumerFactory.java
index 05ebfdddf8b80..e12cbec87fe45 100644
--- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/kafka/SimpleKafkaConsumerFactory.java
+++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/kafka/SimpleKafkaConsumerFactory.java
@@ -4,8 +4,11 @@
import com.linkedin.gms.factory.config.ConfigurationProvider;
import java.time.Duration;
import java.util.Arrays;
+import java.util.Map;
+
import lombok.extern.slf4j.Slf4j;
import org.apache.avro.generic.GenericRecord;
+import org.apache.kafka.clients.consumer.ConsumerConfig;
import org.apache.kafka.common.serialization.StringDeserializer;
import org.springframework.beans.factory.annotation.Qualifier;
import org.springframework.boot.autoconfigure.kafka.KafkaProperties;
@@ -40,10 +43,14 @@ protected KafkaListenerContainerFactory> createInstance(@Qualifier("configurat
consumerProps.setBootstrapServers(Arrays.asList(kafkaConfiguration.getBootstrapServers().split(",")));
} // else we rely on KafkaProperties which defaults to localhost:9092
+ Map customizedProperties = consumerProps.buildProperties();
+ customizedProperties.put(ConsumerConfig.MAX_PARTITION_FETCH_BYTES_CONFIG,
+ kafkaConfiguration.getConsumer().getMaxPartitionFetchBytes());
+
ConcurrentKafkaListenerContainerFactory factory =
new ConcurrentKafkaListenerContainerFactory<>();
factory.setContainerCustomizer(new ThreadPoolContainerCustomizer());
- factory.setConsumerFactory(new DefaultKafkaConsumerFactory<>(properties.buildConsumerProperties()));
+ factory.setConsumerFactory(new DefaultKafkaConsumerFactory<>(customizedProperties));
log.info("Simple KafkaListenerContainerFactory built successfully");
From 758ed47644b330efbbee4e61dde71f6ff5808e23 Mon Sep 17 00:00:00 2001
From: Dmytro Kulyk <34435869+KulykDmytro@users.noreply.github.com>
Date: Mon, 30 Oct 2023 06:14:32 +0200
Subject: [PATCH 019/792] feat(ingest/jsonschema) enable schema-aware
`JsonSchemaTranslator` (#8971)
Co-authored-by: Harshal Sheth
---
.../src/datahub/ingestion/extractor/json_schema_util.py | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/metadata-ingestion/src/datahub/ingestion/extractor/json_schema_util.py b/metadata-ingestion/src/datahub/ingestion/extractor/json_schema_util.py
index c943b83a887ed..360ddf1129154 100644
--- a/metadata-ingestion/src/datahub/ingestion/extractor/json_schema_util.py
+++ b/metadata-ingestion/src/datahub/ingestion/extractor/json_schema_util.py
@@ -598,7 +598,8 @@ def get_fields_from_schema(
jsonref_schema_dict = schema_dict
else:
# first validate the schema using a json validator
- jsonschema.Draft7Validator.check_schema(schema_dict)
+ validator = jsonschema.validators.validator_for(schema_dict)
+ validator.check_schema(schema_dict)
# then apply jsonref
jsonref_schema_dict = jsonref.loads(schema_string)
except Exception as e:
From 2c019148ad451752eff582c3206df75c83fe2a63 Mon Sep 17 00:00:00 2001
From: Alex Klavens <123000295+alexklavensnyt@users.noreply.github.com>
Date: Mon, 30 Oct 2023 04:43:52 -0400
Subject: [PATCH 020/792] =?UTF-8?q?fix(metadata-ingestion):=20adds=20defau?=
=?UTF-8?q?lt=20value=20to=20=5Fresolved=5Fdomain=5Furn=20i=E2=80=A6=20(#9?=
=?UTF-8?q?115)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Co-authored-by: Alex Klavens
---
.../src/datahub/api/entities/dataproduct/dataproduct.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/metadata-ingestion/src/datahub/api/entities/dataproduct/dataproduct.py b/metadata-ingestion/src/datahub/api/entities/dataproduct/dataproduct.py
index 2d9b14ceb2d06..28e4a03b8f75f 100644
--- a/metadata-ingestion/src/datahub/api/entities/dataproduct/dataproduct.py
+++ b/metadata-ingestion/src/datahub/api/entities/dataproduct/dataproduct.py
@@ -104,7 +104,7 @@ class DataProduct(ConfigModel):
id: str
domain: str
- _resolved_domain_urn: Optional[str]
+ _resolved_domain_urn: Optional[str] = None
assets: Optional[List[str]] = None
display_name: Optional[str] = None
owners: Optional[List[Union[str, Ownership]]] = None
From f5c8192cca6eacc7e21e62204883854f0f6bcbdb Mon Sep 17 00:00:00 2001
From: Aseem Bansal
Date: Mon, 30 Oct 2023 18:32:51 +0530
Subject: [PATCH 021/792] ci: tweak to only run relevant workflows (#9052)
---
.github/workflows/airflow-plugin.yml | 2 +-
.github/workflows/check-datahub-jars.yml | 12 ++++--------
.github/workflows/documentation.yml | 8 ++++++++
.github/workflows/metadata-ingestion.yml | 2 +-
.github/workflows/metadata-model.yml | 5 ++---
5 files changed, 16 insertions(+), 13 deletions(-)
diff --git a/.github/workflows/airflow-plugin.yml b/.github/workflows/airflow-plugin.yml
index 54042d104d906..d0c0f52781b9a 100644
--- a/.github/workflows/airflow-plugin.yml
+++ b/.github/workflows/airflow-plugin.yml
@@ -12,7 +12,7 @@ on:
branches:
- "**"
paths:
- - ".github/**"
+ - ".github/workflows/airflow-plugin.yml"
- "metadata-ingestion-modules/airflow-plugin/**"
- "metadata-ingestion/**"
- "metadata-models/**"
diff --git a/.github/workflows/check-datahub-jars.yml b/.github/workflows/check-datahub-jars.yml
index 9a17a70e7f8d4..41f9ea91a94e2 100644
--- a/.github/workflows/check-datahub-jars.yml
+++ b/.github/workflows/check-datahub-jars.yml
@@ -4,17 +4,13 @@ on:
push:
branches:
- master
- paths-ignore:
- - "docker/**"
- - "docs/**"
- - "**.md"
+ paths:
+ - "metadata-integration"
pull_request:
branches:
- "**"
- paths-ignore:
- - "docker/**"
- - "docs/**"
- - "**.md"
+ paths:
+ - "metadata-integration"
release:
types: [published]
diff --git a/.github/workflows/documentation.yml b/.github/workflows/documentation.yml
index ebe2990f3a3cd..c94282938120e 100644
--- a/.github/workflows/documentation.yml
+++ b/.github/workflows/documentation.yml
@@ -4,9 +4,17 @@ on:
pull_request:
branches:
- "**"
+ paths:
+ - "metadata-ingestion/**"
+ - "metadata-models/**"
+ - "docs-website/**"
push:
branches:
- master
+ paths:
+ - "metadata-ingestion/**"
+ - "metadata-models/**"
+ - "docs-website/**"
# release:
# types: [published, edited]
diff --git a/.github/workflows/metadata-ingestion.yml b/.github/workflows/metadata-ingestion.yml
index 699ca330ce0ac..ec6bd4141cc6f 100644
--- a/.github/workflows/metadata-ingestion.yml
+++ b/.github/workflows/metadata-ingestion.yml
@@ -11,7 +11,7 @@ on:
branches:
- "**"
paths:
- - ".github/**"
+ - ".github/workflows/metadata-ingestion.yml"
- "metadata-ingestion/**"
- "metadata-models/**"
release:
diff --git a/.github/workflows/metadata-model.yml b/.github/workflows/metadata-model.yml
index 9d54c88eee591..4bae5ccc9a266 100644
--- a/.github/workflows/metadata-model.yml
+++ b/.github/workflows/metadata-model.yml
@@ -3,9 +3,8 @@ on:
push:
branches:
- master
- paths-ignore:
- - "docs/**"
- - "**.md"
+ paths:
+ - "metadata-models/**"
release:
types: [published]
From 9c72bd9ed7f1c2b4228fc656c5f8b6f31bf0d431 Mon Sep 17 00:00:00 2001
From: Kos Korchak <97058061+kkorchak@users.noreply.github.com>
Date: Mon, 30 Oct 2023 14:12:07 -0400
Subject: [PATCH 022/792] fix(test): Fix for flaky download_lineage_results
cypress test (#9132)
---
.../cypress/cypress/e2e/lineage/download_lineage_results.js | 3 +++
.../tests/cypress/cypress/e2e/mutations/dataset_ownership.js | 4 ++++
2 files changed, 7 insertions(+)
diff --git a/smoke-test/tests/cypress/cypress/e2e/lineage/download_lineage_results.js b/smoke-test/tests/cypress/cypress/e2e/lineage/download_lineage_results.js
index 315aa7b22b9da..dc6efc9f7df66 100644
--- a/smoke-test/tests/cypress/cypress/e2e/lineage/download_lineage_results.js
+++ b/smoke-test/tests/cypress/cypress/e2e/lineage/download_lineage_results.js
@@ -27,6 +27,9 @@ const downloadCsvFile = (filename) => {
};
describe("download lineage results to .csv file", () => {
+ beforeEach(() => {
+ cy.on('uncaught:exception', (err, runnable) => { return false; });
+ });
it("download and verify lineage results for 1st, 2nd and 3+ degree of dependencies", () => {
cy.loginWithCredentials();
diff --git a/smoke-test/tests/cypress/cypress/e2e/mutations/dataset_ownership.js b/smoke-test/tests/cypress/cypress/e2e/mutations/dataset_ownership.js
index 99ad9a68d35e1..465d7998b9f9a 100644
--- a/smoke-test/tests/cypress/cypress/e2e/mutations/dataset_ownership.js
+++ b/smoke-test/tests/cypress/cypress/e2e/mutations/dataset_ownership.js
@@ -29,6 +29,10 @@ const addOwner = (owner, type, elementId) => {
}
describe("add, remove ownership for dataset", () => {
+ beforeEach(() => {
+ cy.on('uncaught:exception', (err, runnable) => { return false; });
+ });
+
it("create test user and test group, add user to a group", () => {
cy.loginWithCredentials();
cy.createUser(username, password, email);
From 300cea373d6a94f05cf3bd95ab69bc503a28538e Mon Sep 17 00:00:00 2001
From: Pedro Silva
Date: Mon, 30 Oct 2023 20:50:42 +0000
Subject: [PATCH 023/792] docs: Update updating-datahub.md (#9131)
---
docs/how/updating-datahub.md | 12 +++++++++++-
1 file changed, 11 insertions(+), 1 deletion(-)
diff --git a/docs/how/updating-datahub.md b/docs/how/updating-datahub.md
index 4d1535f28fa0a..28f11e4b6d707 100644
--- a/docs/how/updating-datahub.md
+++ b/docs/how/updating-datahub.md
@@ -4,10 +4,20 @@ This file documents any backwards-incompatible changes in DataHub and assists pe
## Next
-- #9010 - In Redshift source's config `incremental_lineage` is set default to off.
+### Breaking Changes
+
+### Potential Downtime
+
+### Deprecations
+
+### Other Notable Changes
+
+## 0.12.0
### Breaking Changes
+- #9044 - GraphQL APIs for adding ownership now expect either an `ownershipTypeUrn` referencing a customer ownership type or a (deprecated) `type`. Where before adding an ownership without a concrete type was allowed, this is no longer the case. For simplicity you can use the `type` parameter which will get translated to a custom ownership type internally if one exists for the type being added.
+- #9010 - In Redshift source's config `incremental_lineage` is set default to off.
- #8810 - Removed support for SQLAlchemy 1.3.x. Only SQLAlchemy 1.4.x is supported now.
- #8942 - Removed `urn:li:corpuser:datahub` owner for the `Measure`, `Dimension` and `Temporal` tags emitted
by Looker and LookML source connectors.
From 58bcedcd6a091263c6dc3e1181c260233a80575d Mon Sep 17 00:00:00 2001
From: Harshal Sheth
Date: Mon, 30 Oct 2023 14:18:48 -0700
Subject: [PATCH 024/792] fix(ingest/clickhouse): pin version to solve column
reflection regression (#9143)
---
metadata-ingestion/setup.py | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/metadata-ingestion/setup.py b/metadata-ingestion/setup.py
index 7f7826abe2095..b1c5510efd923 100644
--- a/metadata-ingestion/setup.py
+++ b/metadata-ingestion/setup.py
@@ -173,7 +173,9 @@
clickhouse_common = {
# Clickhouse 0.2.0 adds support for SQLAlchemy 1.4.x
- "clickhouse-sqlalchemy>=0.2.0",
+ # Disallow 0.2.5 because of https://github.com/xzkostyan/clickhouse-sqlalchemy/issues/272.
+ # Note that there's also a known issue around nested map types: https://github.com/xzkostyan/clickhouse-sqlalchemy/issues/269.
+ "clickhouse-sqlalchemy>=0.2.0,<0.2.5",
}
redshift_common = {
From 51d6d1f4531dad133e06db75267fbea77e424d00 Mon Sep 17 00:00:00 2001
From: Harshal Sheth
Date: Mon, 30 Oct 2023 14:19:52 -0700
Subject: [PATCH 025/792] feat(ingest/looker): cleanup error handling (#9135)
---
.../src/datahub/ingestion/api/workunit.py | 6 +++++-
.../ingestion/source/looker/looker_lib_wrapper.py | 8 ++++++--
.../ingestion/source/looker/looker_source.py | 14 ++------------
.../ingestion/source/looker/lookml_source.py | 5 +----
4 files changed, 14 insertions(+), 19 deletions(-)
diff --git a/metadata-ingestion/src/datahub/ingestion/api/workunit.py b/metadata-ingestion/src/datahub/ingestion/api/workunit.py
index 8eea3514a22af..b1c003ee27e12 100644
--- a/metadata-ingestion/src/datahub/ingestion/api/workunit.py
+++ b/metadata-ingestion/src/datahub/ingestion/api/workunit.py
@@ -22,7 +22,11 @@ class MetadataWorkUnit(WorkUnit):
metadata: Union[
MetadataChangeEvent, MetadataChangeProposal, MetadataChangeProposalWrapper
]
- # A workunit creator can determine if this workunit is allowed to fail
+
+ # A workunit creator can determine if this workunit is allowed to fail.
+ # TODO: This flag was initially added during the rollout of the subType aspect
+ # to improve backwards compatibility, but is not really needed anymore and so
+ # should be removed.
treat_errors_as_warnings: bool = False
# When this is set to false, this MWU will be ignored by automatic helpers
diff --git a/metadata-ingestion/src/datahub/ingestion/source/looker/looker_lib_wrapper.py b/metadata-ingestion/src/datahub/ingestion/source/looker/looker_lib_wrapper.py
index cf132b7ef27f7..b00f74b71e792 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/looker/looker_lib_wrapper.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/looker/looker_lib_wrapper.py
@@ -123,8 +123,12 @@ def get_user(self, id_: str, user_fields: str) -> Optional[User]:
transport_options=self.transport_options,
)
except SDKError as e:
- logger.warning(f"Could not find user with id {id_}")
- logger.warning(f"Failure was {e}")
+ if "Looker Not Found (404)" in str(e):
+ # User not found
+ logger.info(f"Could not find user with id {id_}: 404 error")
+ else:
+ logger.warning(f"Could not find user with id {id_}")
+ logger.warning(f"Failure was {e}")
# User not found
return None
diff --git a/metadata-ingestion/src/datahub/ingestion/source/looker/looker_source.py b/metadata-ingestion/src/datahub/ingestion/source/looker/looker_source.py
index a3df977582ca4..09683d790c14c 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/looker/looker_source.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/looker/looker_source.py
@@ -926,14 +926,7 @@ def process_metrics_dimensions_and_fields_for_dashboard(
mcps = chart_mcps
mcps.append(dashboard_mcp)
- workunits = [
- MetadataWorkUnit(
- id=f"looker-{mcp.aspectName}-{mcp.entityUrn}",
- mcp=mcp,
- treat_errors_as_warnings=True,
- )
- for mcp in mcps
- ]
+ workunits = [mcp.as_workunit() for mcp in mcps]
return workunits
@@ -1320,10 +1313,7 @@ def get_workunits_internal(self) -> Iterable[MetadataWorkUnit]:
id=f"looker-{event.proposedSnapshot.urn}", mce=event
)
elif isinstance(event, MetadataChangeProposalWrapper):
- # We want to treat subtype aspects as optional, so allowing failures in this aspect to be treated as warnings rather than failures
- yield event.as_workunit(
- treat_errors_as_warnings=event.aspectName in ["subTypes"]
- )
+ yield event.as_workunit()
else:
raise Exception(f"Unexpected type of event {event}")
self.reporter.report_stage_end("explore_metadata")
diff --git a/metadata-ingestion/src/datahub/ingestion/source/looker/lookml_source.py b/metadata-ingestion/src/datahub/ingestion/source/looker/lookml_source.py
index e69c3b6e601bd..e6b78cc7a7745 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/looker/lookml_source.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/looker/lookml_source.py
@@ -2171,10 +2171,7 @@ def get_internal_workunits(self) -> Iterable[MetadataWorkUnit]: # noqa: C901
for mcp in self._build_dataset_mcps(
maybe_looker_view
):
- # We want to treat mcp aspects as optional, so allowing failures in this aspect to be treated as warnings rather than failures
- yield mcp.as_workunit(
- treat_errors_as_warnings=True
- )
+ yield mcp.as_workunit()
else:
(
prev_model_name,
From 0bd2d9a36cdf18575ac4e54126db5be33ec59d8a Mon Sep 17 00:00:00 2001
From: Harshal Sheth
Date: Mon, 30 Oct 2023 14:22:05 -0700
Subject: [PATCH 026/792] feat(ingest): add `entity_supports_aspect` helper
(#9120)
---
.../src/datahub/emitter/mcp_builder.py | 13 ++++++++++++-
metadata-ingestion/tests/unit/test_mcp_builder.py | 9 +++++++++
2 files changed, 21 insertions(+), 1 deletion(-)
diff --git a/metadata-ingestion/src/datahub/emitter/mcp_builder.py b/metadata-ingestion/src/datahub/emitter/mcp_builder.py
index 65e0c0d6ba60d..d50feba8b119c 100644
--- a/metadata-ingestion/src/datahub/emitter/mcp_builder.py
+++ b/metadata-ingestion/src/datahub/emitter/mcp_builder.py
@@ -1,9 +1,10 @@
-from typing import Dict, Iterable, List, Optional, TypeVar
+from typing import Dict, Iterable, List, Optional, Type, TypeVar
from pydantic.fields import Field
from pydantic.main import BaseModel
from datahub.emitter.mce_builder import (
+ Aspect,
datahub_guid,
make_container_urn,
make_data_platform_urn,
@@ -18,6 +19,7 @@
)
from datahub.metadata.com.linkedin.pegasus2avro.container import ContainerProperties
from datahub.metadata.schema_classes import (
+ KEY_ASPECTS,
ContainerClass,
DomainsClass,
EmbedClass,
@@ -306,3 +308,12 @@ def create_embed_mcp(urn: str, embed_url: str) -> MetadataChangeProposalWrapper:
entityUrn=urn,
aspect=EmbedClass(renderUrl=embed_url),
)
+
+
+def entity_supports_aspect(entity_type: str, aspect_type: Type[Aspect]) -> bool:
+ entity_key_aspect = KEY_ASPECTS[entity_type]
+ aspect_name = aspect_type.get_aspect_name()
+
+ supported_aspects = entity_key_aspect.ASPECT_INFO["entityAspects"]
+
+ return aspect_name in supported_aspects
diff --git a/metadata-ingestion/tests/unit/test_mcp_builder.py b/metadata-ingestion/tests/unit/test_mcp_builder.py
index 561b782ef9e46..e304edb24789c 100644
--- a/metadata-ingestion/tests/unit/test_mcp_builder.py
+++ b/metadata-ingestion/tests/unit/test_mcp_builder.py
@@ -1,4 +1,5 @@
import datahub.emitter.mcp_builder as builder
+from datahub.metadata.schema_classes import StatusClass, TelemetryClientIdClass
def test_guid_generator():
@@ -83,3 +84,11 @@ def test_guid_generators():
guid = key.guid()
assert guid == guid_datahub
+
+
+def test_entity_supports_aspect():
+ assert builder.entity_supports_aspect("dataset", StatusClass)
+ assert not builder.entity_supports_aspect("telemetry", StatusClass)
+
+ assert not builder.entity_supports_aspect("dataset", TelemetryClientIdClass)
+ assert builder.entity_supports_aspect("telemetry", TelemetryClientIdClass)
From ce0f36b8bc74e3f0bab447408096347617804d92 Mon Sep 17 00:00:00 2001
From: Harshal Sheth
Date: Mon, 30 Oct 2023 14:23:19 -0700
Subject: [PATCH 027/792] feat(sqlparser): support more update syntaxes + fix
bug with subqueries (#9105)
---
.../src/datahub/utilities/sqlglot_lineage.py | 57 ++++++++++-
.../test_postgres_select_subquery.json | 64 ++++++++++++
.../test_snowflake_update_from_table.json | 1 +
.../test_snowflake_update_hardcoded.json | 4 +-
.../goldens/test_snowflake_update_self.json | 29 ++++++
.../unit/sql_parsing/test_sqlglot_lineage.py | 98 +++++++++++++++++++
6 files changed, 247 insertions(+), 6 deletions(-)
create mode 100644 metadata-ingestion/tests/unit/sql_parsing/goldens/test_postgres_select_subquery.json
create mode 100644 metadata-ingestion/tests/unit/sql_parsing/goldens/test_snowflake_update_self.json
diff --git a/metadata-ingestion/src/datahub/utilities/sqlglot_lineage.py b/metadata-ingestion/src/datahub/utilities/sqlglot_lineage.py
index 1d74b20569814..388388f9f4b38 100644
--- a/metadata-ingestion/src/datahub/utilities/sqlglot_lineage.py
+++ b/metadata-ingestion/src/datahub/utilities/sqlglot_lineage.py
@@ -12,8 +12,8 @@
import sqlglot.errors
import sqlglot.lineage
import sqlglot.optimizer.annotate_types
+import sqlglot.optimizer.optimizer
import sqlglot.optimizer.qualify
-import sqlglot.optimizer.qualify_columns
from pydantic import BaseModel
from typing_extensions import TypedDict
@@ -48,6 +48,19 @@
SQL_PARSE_RESULT_CACHE_SIZE = 1000
+RULES_BEFORE_TYPE_ANNOTATION: tuple = tuple(
+ filter(
+ # Skip pushdown_predicates because it sometimes throws exceptions, and we
+ # don't actually need it for anything.
+ lambda func: func.__name__ not in {"pushdown_predicates"},
+ itertools.takewhile(
+ lambda func: func != sqlglot.optimizer.annotate_types.annotate_types,
+ sqlglot.optimizer.optimizer.RULES,
+ ),
+ )
+)
+
+
class GraphQLSchemaField(TypedDict):
fieldPath: str
nativeDataType: str
@@ -289,6 +302,10 @@ def _table_level_lineage(
)
# TODO: If a CTAS has "LIMIT 0", it's not really lineage, just copying the schema.
+ # Update statements implicitly read from the table being updated, so add those back in.
+ if isinstance(statement, sqlglot.exp.Update):
+ tables = tables | modified
+
return tables, modified
@@ -568,17 +585,20 @@ def _schema_aware_fuzzy_column_resolve(
# - the select instead of the full outer statement
# - schema info
# - column qualification enabled
+ # - running the full pre-type annotation optimizer
# logger.debug("Schema: %s", sqlglot_db_schema.mapping)
- statement = sqlglot.optimizer.qualify.qualify(
+ statement = sqlglot.optimizer.optimizer.optimize(
statement,
dialect=dialect,
schema=sqlglot_db_schema,
+ qualify_columns=True,
validate_qualify_columns=False,
identify=True,
# sqlglot calls the db -> schema -> table hierarchy "catalog", "db", "table".
catalog=default_db,
db=default_schema,
+ rules=RULES_BEFORE_TYPE_ANNOTATION,
)
except (sqlglot.errors.OptimizeError, ValueError) as e:
raise SqlUnderstandingError(
@@ -748,6 +768,7 @@ def _extract_select_from_create(
_UPDATE_ARGS_NOT_SUPPORTED_BY_SELECT: Set[str] = set(
sqlglot.exp.Update.arg_types.keys()
) - set(sqlglot.exp.Select.arg_types.keys())
+_UPDATE_FROM_TABLE_ARGS_TO_MOVE = {"joins", "laterals", "pivot"}
def _extract_select_from_update(
@@ -774,17 +795,43 @@ def _extract_select_from_update(
# they'll get caught later.
new_expressions.append(expr)
- return sqlglot.exp.Select(
+ # Special translation for the `from` clause.
+ extra_args = {}
+ original_from = statement.args.get("from")
+ if original_from and isinstance(original_from.this, sqlglot.exp.Table):
+ # Move joins, laterals, and pivots from the Update->From->Table->field
+ # to the top-level Select->field.
+
+ for k in _UPDATE_FROM_TABLE_ARGS_TO_MOVE:
+ if k in original_from.this.args:
+ # Mutate the from table clause in-place.
+ extra_args[k] = original_from.this.args.pop(k)
+
+ select_statement = sqlglot.exp.Select(
**{
**{
k: v
for k, v in statement.args.items()
if k not in _UPDATE_ARGS_NOT_SUPPORTED_BY_SELECT
},
+ **extra_args,
"expressions": new_expressions,
}
)
+ # Update statements always implicitly have the updated table in context.
+ # TODO: Retain table name alias.
+ if select_statement.args.get("from"):
+ # select_statement = sqlglot.parse_one(select_statement.sql(dialect=dialect))
+
+ select_statement = select_statement.join(
+ statement.this, append=True, join_kind="cross"
+ )
+ else:
+ select_statement = select_statement.from_(statement.this)
+
+ return select_statement
+
def _is_create_table_ddl(statement: sqlglot.exp.Expression) -> bool:
return isinstance(statement, sqlglot.exp.Create) and isinstance(
@@ -955,7 +1002,7 @@ def _sqlglot_lineage_inner(
# Fetch schema info for the relevant tables.
table_name_urn_mapping: Dict[_TableName, str] = {}
table_name_schema_mapping: Dict[_TableName, SchemaInfo] = {}
- for table in itertools.chain(tables, modified):
+ for table in tables | modified:
# For select statements, qualification will be a no-op. For other statements, this
# is where the qualification actually happens.
qualified_table = table.qualified(
@@ -971,7 +1018,7 @@ def _sqlglot_lineage_inner(
# Also include the original, non-qualified table name in the urn mapping.
table_name_urn_mapping[table] = urn
- total_tables_discovered = len(tables) + len(modified)
+ total_tables_discovered = len(tables | modified)
total_schemas_resolved = len(table_name_schema_mapping)
debug_info = SqlParsingDebugInfo(
confidence=0.9 if total_tables_discovered == total_schemas_resolved
diff --git a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_postgres_select_subquery.json b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_postgres_select_subquery.json
new file mode 100644
index 0000000000000..0c40ce120c934
--- /dev/null
+++ b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_postgres_select_subquery.json
@@ -0,0 +1,64 @@
+{
+ "query_type": "SELECT",
+ "in_tables": [
+ "urn:li:dataset:(urn:li:dataPlatform:postgres,my_db.my_schema.table1,PROD)",
+ "urn:li:dataset:(urn:li:dataPlatform:postgres,my_db.my_schema.table2,PROD)"
+ ],
+ "out_tables": [],
+ "column_lineage": [
+ {
+ "downstream": {
+ "table": null,
+ "column": "a",
+ "column_type": {
+ "type": {
+ "com.linkedin.pegasus2avro.schema.NumberType": {}
+ }
+ },
+ "native_column_type": "INT"
+ },
+ "upstreams": [
+ {
+ "table": "urn:li:dataset:(urn:li:dataPlatform:postgres,my_db.my_schema.table1,PROD)",
+ "column": "a"
+ }
+ ]
+ },
+ {
+ "downstream": {
+ "table": null,
+ "column": "b",
+ "column_type": {
+ "type": {
+ "com.linkedin.pegasus2avro.schema.NumberType": {}
+ }
+ },
+ "native_column_type": "INT"
+ },
+ "upstreams": [
+ {
+ "table": "urn:li:dataset:(urn:li:dataPlatform:postgres,my_db.my_schema.table1,PROD)",
+ "column": "b"
+ }
+ ]
+ },
+ {
+ "downstream": {
+ "table": null,
+ "column": "c",
+ "column_type": {
+ "type": {
+ "com.linkedin.pegasus2avro.schema.ArrayType": {}
+ }
+ },
+ "native_column_type": "INT[]"
+ },
+ "upstreams": [
+ {
+ "table": "urn:li:dataset:(urn:li:dataPlatform:postgres,my_db.my_schema.table2,PROD)",
+ "column": "c"
+ }
+ ]
+ }
+ ]
+}
\ No newline at end of file
diff --git a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_snowflake_update_from_table.json b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_snowflake_update_from_table.json
index e2baa34e7fe28..d51001f969799 100644
--- a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_snowflake_update_from_table.json
+++ b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_snowflake_update_from_table.json
@@ -1,6 +1,7 @@
{
"query_type": "UPDATE",
"in_tables": [
+ "urn:li:dataset:(urn:li:dataPlatform:snowflake,my_db.my_schema.my_table,PROD)",
"urn:li:dataset:(urn:li:dataPlatform:snowflake,my_db.my_schema.table1,PROD)",
"urn:li:dataset:(urn:li:dataPlatform:snowflake,my_db.my_schema.table2,PROD)"
],
diff --git a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_snowflake_update_hardcoded.json b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_snowflake_update_hardcoded.json
index b41ed61b37cdb..f421b28530c64 100644
--- a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_snowflake_update_hardcoded.json
+++ b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_snowflake_update_hardcoded.json
@@ -1,6 +1,8 @@
{
"query_type": "UPDATE",
- "in_tables": [],
+ "in_tables": [
+ "urn:li:dataset:(urn:li:dataPlatform:snowflake,snowflake_sample_data.tpch_sf1.orders,PROD)"
+ ],
"out_tables": [
"urn:li:dataset:(urn:li:dataPlatform:snowflake,snowflake_sample_data.tpch_sf1.orders,PROD)"
],
diff --git a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_snowflake_update_self.json b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_snowflake_update_self.json
new file mode 100644
index 0000000000000..c8cc32164a3eb
--- /dev/null
+++ b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_snowflake_update_self.json
@@ -0,0 +1,29 @@
+{
+ "query_type": "UPDATE",
+ "in_tables": [
+ "urn:li:dataset:(urn:li:dataPlatform:snowflake,snowflake_sample_data.tpch_sf1.orders,PROD)"
+ ],
+ "out_tables": [
+ "urn:li:dataset:(urn:li:dataPlatform:snowflake,snowflake_sample_data.tpch_sf1.orders,PROD)"
+ ],
+ "column_lineage": [
+ {
+ "downstream": {
+ "table": "urn:li:dataset:(urn:li:dataPlatform:snowflake,snowflake_sample_data.tpch_sf1.orders,PROD)",
+ "column": "orderkey",
+ "column_type": {
+ "type": {
+ "com.linkedin.pegasus2avro.schema.NumberType": {}
+ }
+ },
+ "native_column_type": "DECIMAL"
+ },
+ "upstreams": [
+ {
+ "table": "urn:li:dataset:(urn:li:dataPlatform:snowflake,snowflake_sample_data.tpch_sf1.orders,PROD)",
+ "column": "orderkey"
+ }
+ ]
+ }
+ ]
+}
\ No newline at end of file
diff --git a/metadata-ingestion/tests/unit/sql_parsing/test_sqlglot_lineage.py b/metadata-ingestion/tests/unit/sql_parsing/test_sqlglot_lineage.py
index dfc5b486abd35..5559ebe1756a6 100644
--- a/metadata-ingestion/tests/unit/sql_parsing/test_sqlglot_lineage.py
+++ b/metadata-ingestion/tests/unit/sql_parsing/test_sqlglot_lineage.py
@@ -768,3 +768,101 @@ def test_snowflake_update_from_table():
},
expected_file=RESOURCE_DIR / "test_snowflake_update_from_table.json",
)
+
+
+def test_snowflake_update_self():
+ assert_sql_result(
+ """
+UPDATE snowflake_sample_data.tpch_sf1.orders
+SET orderkey = orderkey + 1
+""",
+ dialect="snowflake",
+ schemas={
+ "urn:li:dataset:(urn:li:dataPlatform:snowflake,snowflake_sample_data.tpch_sf1.orders,PROD)": {
+ "orderkey": "NUMBER(38,0)",
+ "totalprice": "NUMBER(12,2)",
+ },
+ },
+ expected_file=RESOURCE_DIR / "test_snowflake_update_self.json",
+ )
+
+
+def test_postgres_select_subquery():
+ assert_sql_result(
+ """
+SELECT
+ a,
+ b,
+ (SELECT c FROM table2 WHERE table2.id = table1.id) as c
+FROM table1
+""",
+ dialect="postgres",
+ default_db="my_db",
+ default_schema="my_schema",
+ schemas={
+ "urn:li:dataset:(urn:li:dataPlatform:postgres,my_db.my_schema.table1,PROD)": {
+ "id": "INTEGER",
+ "a": "INTEGER",
+ "b": "INTEGER",
+ },
+ "urn:li:dataset:(urn:li:dataPlatform:postgres,my_db.my_schema.table2,PROD)": {
+ "id": "INTEGER",
+ "c": "INTEGER",
+ },
+ },
+ expected_file=RESOURCE_DIR / "test_postgres_select_subquery.json",
+ )
+
+
+@pytest.mark.skip(reason="We can't parse column-list syntax with sub-selects yet")
+def test_postgres_update_subselect():
+ assert_sql_result(
+ """
+UPDATE accounts SET sales_person_name =
+ (SELECT name FROM employees
+ WHERE employees.id = accounts.sales_person_id)
+""",
+ dialect="postgres",
+ default_db="my_db",
+ default_schema="my_schema",
+ schemas={
+ "urn:li:dataset:(urn:li:dataPlatform:postgres,my_db.my_schema.accounts,PROD)": {
+ "id": "INTEGER",
+ "sales_person_id": "INTEGER",
+ "sales_person_name": "VARCHAR(16777216)",
+ },
+ "urn:li:dataset:(urn:li:dataPlatform:postgres,my_db.my_schema.employees,PROD)": {
+ "id": "INTEGER",
+ "name": "VARCHAR(16777216)",
+ },
+ },
+ expected_file=RESOURCE_DIR / "test_postgres_update_subselect.json",
+ )
+
+
+@pytest.mark.skip(reason="We can't parse column-list syntax with sub-selects yet")
+def test_postgres_complex_update():
+ # Example query from the postgres docs:
+ # https://www.postgresql.org/docs/current/sql-update.html
+ assert_sql_result(
+ """
+UPDATE accounts SET (contact_first_name, contact_last_name) =
+ (SELECT first_name, last_name FROM employees
+ WHERE employees.id = accounts.sales_person);
+""",
+ dialect="postgres",
+ schemas={
+ "urn:li:dataset:(urn:li:dataPlatform:postgres,my_db.my_schema.accounts,PROD)": {
+ "id": "INTEGER",
+ "contact_first_name": "VARCHAR(16777216)",
+ "contact_last_name": "VARCHAR(16777216)",
+ "sales_person": "INTEGER",
+ },
+ "urn:li:dataset:(urn:li:dataPlatform:postgres,my_db.my_schema.employees,PROD)": {
+ "id": "INTEGER",
+ "first_name": "VARCHAR(16777216)",
+ "last_name": "VARCHAR(16777216)",
+ },
+ },
+ expected_file=RESOURCE_DIR / "test_postgres_complex_update.json",
+ )
From 94d438d44f2d18def4a422cd60150d2c9a78be49 Mon Sep 17 00:00:00 2001
From: sachinsaju <33017477+sachinsaju@users.noreply.github.com>
Date: Tue, 31 Oct 2023 08:54:56 +0530
Subject: [PATCH 028/792] docs: correct broken doc links (#9137)
Co-authored-by: Hyejin Yoon <0327jane@gmail.com>
---
docs/deploy/aws.md | 2 +-
docs/what-is-datahub/datahub-concepts.md | 2 +-
2 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/docs/deploy/aws.md b/docs/deploy/aws.md
index e0f57b4a0b0cb..6598b93c25e9a 100644
--- a/docs/deploy/aws.md
+++ b/docs/deploy/aws.md
@@ -15,7 +15,7 @@ This guide requires the following tools:
- [kubectl](https://kubernetes.io/docs/tasks/tools/) to manage kubernetes resources
- [helm](https://helm.sh/docs/intro/install/) to deploy the resources based on helm charts. Note, we only support Helm
3.
-- [eksctl](https://eksctl.io/introduction/#installation) to create and manage clusters on EKS
+- [eksctl](https://eksctl.io/installation/) to create and manage clusters on EKS
- [AWS CLI](https://docs.aws.amazon.com/cli/latest/userguide/cli-chap-install.html) to manage AWS resources
To use the above tools, you need to set up AWS credentials by following
diff --git a/docs/what-is-datahub/datahub-concepts.md b/docs/what-is-datahub/datahub-concepts.md
index 6328d97fa6a50..03b86fab0ede4 100644
--- a/docs/what-is-datahub/datahub-concepts.md
+++ b/docs/what-is-datahub/datahub-concepts.md
@@ -99,7 +99,7 @@ List of Data Platforms
- Tableau
- Vertica
-Reference : [data_platforms.json](https://github.com/acryldata/datahub-fork/blob/acryl-main/metadata-service/war/src/main/resources/boot/data_platforms.json)
+Reference : [data_platforms.json](https://github.com/datahub-project/datahub/blob/master/metadata-service/war/src/main/resources/boot/data_platforms.json)
From ea1273281e3a65ab4d94d002ee19f91907a3eb84 Mon Sep 17 00:00:00 2001
From: Harshal Sheth
Date: Mon, 30 Oct 2023 20:57:59 -0700
Subject: [PATCH 029/792] feat(ingest): sql parser perf + asyncio fixes (#9119)
---
metadata-ingestion/setup.py | 2 +-
.../src/datahub/cli/docker_cli.py | 5 ++
.../src/datahub/upgrade/upgrade.py | 12 ++---
.../src/datahub/utilities/sqlglot_lineage.py | 5 +-
.../goldens/test_select_from_union.json | 2 +-
.../test_teradata_strange_operators.json | 46 +++++++++++++++++++
.../unit/sql_parsing/test_sqlglot_lineage.py | 14 ++++++
7 files changed, 73 insertions(+), 13 deletions(-)
create mode 100644 metadata-ingestion/tests/unit/sql_parsing/goldens/test_teradata_strange_operators.json
diff --git a/metadata-ingestion/setup.py b/metadata-ingestion/setup.py
index b1c5510efd923..151842bd84d0a 100644
--- a/metadata-ingestion/setup.py
+++ b/metadata-ingestion/setup.py
@@ -108,7 +108,7 @@
sqlglot_lib = {
# Using an Acryl fork of sqlglot.
# https://github.com/tobymao/sqlglot/compare/main...hsheth2:sqlglot:hsheth?expand=1
- "acryl-sqlglot==18.5.2.dev45",
+ "acryl-sqlglot==18.17.1.dev16",
}
sql_common = (
diff --git a/metadata-ingestion/src/datahub/cli/docker_cli.py b/metadata-ingestion/src/datahub/cli/docker_cli.py
index 4afccfe711e34..77e3285d359ef 100644
--- a/metadata-ingestion/src/datahub/cli/docker_cli.py
+++ b/metadata-ingestion/src/datahub/cli/docker_cli.py
@@ -5,6 +5,7 @@
import os
import pathlib
import platform
+import signal
import subprocess
import sys
import tempfile
@@ -770,6 +771,10 @@ def quickstart( # noqa: C901
logger.debug("docker compose up still running, sending SIGKILL")
up_process.kill()
up_process.wait()
+ else:
+ # If the docker process got a keyboard interrupt, raise one here.
+ if up_process.returncode in {128 + signal.SIGINT, -signal.SIGINT}:
+ raise KeyboardInterrupt
# Check docker health every few seconds.
status = check_docker_quickstart()
diff --git a/metadata-ingestion/src/datahub/upgrade/upgrade.py b/metadata-ingestion/src/datahub/upgrade/upgrade.py
index 30f19b8b84f35..acc7954ad25a6 100644
--- a/metadata-ingestion/src/datahub/upgrade/upgrade.py
+++ b/metadata-ingestion/src/datahub/upgrade/upgrade.py
@@ -1,6 +1,5 @@
import asyncio
import contextlib
-import functools
import logging
import sys
from datetime import datetime, timedelta, timezone
@@ -374,17 +373,14 @@ def check_upgrade(func: Callable[..., T]) -> Callable[..., T]:
@wraps(func)
def async_wrapper(*args: Any, **kwargs: Any) -> Any:
async def run_inner_func():
- loop = asyncio.get_event_loop()
- return await loop.run_in_executor(
- None, functools.partial(func, *args, **kwargs)
- )
+ return func(*args, **kwargs)
async def run_func_check_upgrade():
version_stats_future = asyncio.ensure_future(retrieve_version_stats())
- the_one_future = asyncio.ensure_future(run_inner_func())
- ret = await the_one_future
+ main_func_future = asyncio.ensure_future(run_inner_func())
+ ret = await main_func_future
- # the one future has returned
+ # the main future has returned
# we check the other futures quickly
try:
version_stats = await asyncio.wait_for(version_stats_future, 0.5)
diff --git a/metadata-ingestion/src/datahub/utilities/sqlglot_lineage.py b/metadata-ingestion/src/datahub/utilities/sqlglot_lineage.py
index 388388f9f4b38..6413275ac63a6 100644
--- a/metadata-ingestion/src/datahub/utilities/sqlglot_lineage.py
+++ b/metadata-ingestion/src/datahub/utilities/sqlglot_lineage.py
@@ -106,6 +106,7 @@ def get_query_type_of_sql(expression: sqlglot.exp.Expression) -> QueryType:
sqlglot.exp.Update: QueryType.UPDATE,
sqlglot.exp.Delete: QueryType.DELETE,
sqlglot.exp.Merge: QueryType.MERGE,
+ sqlglot.exp.Subqueryable: QueryType.SELECT, # unions, etc. are also selects
}
for cls, query_type in mapping.items():
@@ -820,10 +821,8 @@ def _extract_select_from_update(
)
# Update statements always implicitly have the updated table in context.
- # TODO: Retain table name alias.
+ # TODO: Retain table name alias, if one was present.
if select_statement.args.get("from"):
- # select_statement = sqlglot.parse_one(select_statement.sql(dialect=dialect))
-
select_statement = select_statement.join(
statement.this, append=True, join_kind="cross"
)
diff --git a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_select_from_union.json b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_select_from_union.json
index 902aa010c8afc..5d1d421f49a2a 100644
--- a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_select_from_union.json
+++ b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_select_from_union.json
@@ -1,5 +1,5 @@
{
- "query_type": "UNKNOWN",
+ "query_type": "SELECT",
"in_tables": [
"urn:li:dataset:(urn:li:dataPlatform:snowflake,snowflake_sample_data.tpch_sf10.orders,PROD)",
"urn:li:dataset:(urn:li:dataPlatform:snowflake,snowflake_sample_data.tpch_sf100.orders,PROD)"
diff --git a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_teradata_strange_operators.json b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_teradata_strange_operators.json
new file mode 100644
index 0000000000000..4b21a2512ccd1
--- /dev/null
+++ b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_teradata_strange_operators.json
@@ -0,0 +1,46 @@
+{
+ "query_type": "SELECT",
+ "in_tables": [
+ "urn:li:dataset:(urn:li:dataPlatform:teradata,dbc.table1,PROD)",
+ "urn:li:dataset:(urn:li:dataPlatform:teradata,dbc.table2,PROD)"
+ ],
+ "out_tables": [],
+ "column_lineage": [
+ {
+ "downstream": {
+ "table": null,
+ "column": "col1",
+ "column_type": null,
+ "native_column_type": null
+ },
+ "upstreams": [
+ {
+ "table": "urn:li:dataset:(urn:li:dataPlatform:teradata,dbc.table1,PROD)",
+ "column": "col1"
+ },
+ {
+ "table": "urn:li:dataset:(urn:li:dataPlatform:teradata,dbc.table2,PROD)",
+ "column": "col1"
+ }
+ ]
+ },
+ {
+ "downstream": {
+ "table": null,
+ "column": "col2",
+ "column_type": null,
+ "native_column_type": null
+ },
+ "upstreams": [
+ {
+ "table": "urn:li:dataset:(urn:li:dataPlatform:teradata,dbc.table1,PROD)",
+ "column": "col2"
+ },
+ {
+ "table": "urn:li:dataset:(urn:li:dataPlatform:teradata,dbc.table2,PROD)",
+ "column": "col2"
+ }
+ ]
+ }
+ ]
+}
\ No newline at end of file
diff --git a/metadata-ingestion/tests/unit/sql_parsing/test_sqlglot_lineage.py b/metadata-ingestion/tests/unit/sql_parsing/test_sqlglot_lineage.py
index 5559ebe1756a6..3b9fa0d55f18d 100644
--- a/metadata-ingestion/tests/unit/sql_parsing/test_sqlglot_lineage.py
+++ b/metadata-ingestion/tests/unit/sql_parsing/test_sqlglot_lineage.py
@@ -675,6 +675,20 @@ def test_teradata_default_normalization():
)
+def test_teradata_strange_operators():
+ assert_sql_result(
+ """
+select col1, col2 from dbc.table1
+where col1 eq 'value1'
+minus
+select col1, col2 from dbc.table2
+""",
+ dialect="teradata",
+ default_schema="dbc",
+ expected_file=RESOURCE_DIR / "test_teradata_strange_operators.json",
+ )
+
+
def test_snowflake_update_hardcoded():
assert_sql_result(
"""
From b565a657d2235b82e65dfbe0bfcc11c97c3d9b79 Mon Sep 17 00:00:00 2001
From: Harshal Sheth
Date: Mon, 30 Oct 2023 23:35:12 -0700
Subject: [PATCH 030/792] feat(quickstart): fix broker
InconsistentClusterIdException issues (#9148)
---
docker/docker-compose-with-cassandra.yml | 6 +++++-
docker/docker-compose-without-neo4j.yml | 6 +++++-
docker/docker-compose.yml | 6 +++++-
docker/quickstart/docker-compose-m1.quickstart.yml | 4 +++-
.../docker-compose-without-neo4j-m1.quickstart.yml | 4 +++-
.../quickstart/docker-compose-without-neo4j.quickstart.yml | 4 +++-
docker/quickstart/docker-compose.quickstart.yml | 4 +++-
7 files changed, 27 insertions(+), 7 deletions(-)
diff --git a/docker/docker-compose-with-cassandra.yml b/docker/docker-compose-with-cassandra.yml
index 9543e67da07f2..39f4341600572 100644
--- a/docker/docker-compose-with-cassandra.yml
+++ b/docker/docker-compose-with-cassandra.yml
@@ -200,7 +200,10 @@ services:
retries: 5
timeout: 5s
volumes:
- - zkdata:/var/lib/zookeeper
+ # See https://stackoverflow.com/a/61008432 for why we need two volumes.
+ # See also: https://docs.confluent.io/platform/current/installation/docker/operations/external-volumes.html#data-volumes-for-kafka-and-zk
+ - zkdata:/var/lib/zookeeper/data
+ - zklogs:/var/lib/zookeeper/log
networks:
default:
name: datahub_network
@@ -210,3 +213,4 @@ volumes:
neo4jdata:
broker:
zkdata:
+ zklogs:
diff --git a/docker/docker-compose-without-neo4j.yml b/docker/docker-compose-without-neo4j.yml
index 022362782f742..235e89e340551 100644
--- a/docker/docker-compose-without-neo4j.yml
+++ b/docker/docker-compose-without-neo4j.yml
@@ -174,7 +174,10 @@ services:
retries: 3
timeout: 5s
volumes:
- - zkdata:/var/lib/zookeeper
+ # See https://stackoverflow.com/a/61008432 for why we need two volumes.
+ # See also: https://docs.confluent.io/platform/current/installation/docker/operations/external-volumes.html#data-volumes-for-kafka-and-zk
+ - zkdata:/var/lib/zookeeper/data
+ - zklogs:/var/lib/zookeeper/log
networks:
default:
name: datahub_network
@@ -182,3 +185,4 @@ volumes:
esdata:
broker:
zkdata:
+ zklogs:
diff --git a/docker/docker-compose.yml b/docker/docker-compose.yml
index a486689e050a2..46da8c6fdbd2a 100644
--- a/docker/docker-compose.yml
+++ b/docker/docker-compose.yml
@@ -195,7 +195,10 @@ services:
retries: 3
timeout: 5s
volumes:
- - zkdata:/var/lib/zookeeper
+ # See https://stackoverflow.com/a/61008432 for why we need two volumes.
+ # See also: https://docs.confluent.io/platform/current/installation/docker/operations/external-volumes.html#data-volumes-for-kafka-and-zk
+ - zkdata:/var/lib/zookeeper/data
+ - zklogs:/var/lib/zookeeper/log
networks:
default:
name: datahub_network
@@ -204,3 +207,4 @@ volumes:
neo4jdata:
broker:
zkdata:
+ zklogs:
diff --git a/docker/quickstart/docker-compose-m1.quickstart.yml b/docker/quickstart/docker-compose-m1.quickstart.yml
index c5de687d335b9..3b6d02c83d0f0 100644
--- a/docker/quickstart/docker-compose-m1.quickstart.yml
+++ b/docker/quickstart/docker-compose-m1.quickstart.yml
@@ -300,7 +300,8 @@ services:
ports:
- ${DATAHUB_MAPPED_ZK_PORT:-2181}:2181
volumes:
- - zkdata:/var/lib/zookeeper
+ - zkdata:/var/lib/zookeeper/data
+ - zklogs:/var/lib/zookeeper/log
version: '3.9'
volumes:
broker: null
@@ -308,3 +309,4 @@ volumes:
mysqldata: null
neo4jdata: null
zkdata: null
+ zklogs: null
diff --git a/docker/quickstart/docker-compose-without-neo4j-m1.quickstart.yml b/docker/quickstart/docker-compose-without-neo4j-m1.quickstart.yml
index b6935f24c5ce2..e45bafc3da480 100644
--- a/docker/quickstart/docker-compose-without-neo4j-m1.quickstart.yml
+++ b/docker/quickstart/docker-compose-without-neo4j-m1.quickstart.yml
@@ -274,10 +274,12 @@ services:
ports:
- ${DATAHUB_MAPPED_ZK_PORT:-2181}:2181
volumes:
- - zkdata:/var/lib/zookeeper
+ - zkdata:/var/lib/zookeeper/data
+ - zklogs:/var/lib/zookeeper/log
version: '3.9'
volumes:
broker: null
esdata: null
mysqldata: null
zkdata: null
+ zklogs: null
diff --git a/docker/quickstart/docker-compose-without-neo4j.quickstart.yml b/docker/quickstart/docker-compose-without-neo4j.quickstart.yml
index 4ff8bbd70da85..020ef5e9a97b9 100644
--- a/docker/quickstart/docker-compose-without-neo4j.quickstart.yml
+++ b/docker/quickstart/docker-compose-without-neo4j.quickstart.yml
@@ -274,10 +274,12 @@ services:
ports:
- ${DATAHUB_MAPPED_ZK_PORT:-2181}:2181
volumes:
- - zkdata:/var/lib/zookeeper
+ - zkdata:/var/lib/zookeeper/data
+ - zklogs:/var/lib/zookeeper/log
version: '3.9'
volumes:
broker: null
esdata: null
mysqldata: null
zkdata: null
+ zklogs: null
diff --git a/docker/quickstart/docker-compose.quickstart.yml b/docker/quickstart/docker-compose.quickstart.yml
index f2950ebab2c9d..8adc2b9063b84 100644
--- a/docker/quickstart/docker-compose.quickstart.yml
+++ b/docker/quickstart/docker-compose.quickstart.yml
@@ -300,7 +300,8 @@ services:
ports:
- ${DATAHUB_MAPPED_ZK_PORT:-2181}:2181
volumes:
- - zkdata:/var/lib/zookeeper
+ - zkdata:/var/lib/zookeeper/data
+ - zklogs:/var/lib/zookeeper/log
version: '3.9'
volumes:
broker: null
@@ -308,3 +309,4 @@ volumes:
mysqldata: null
neo4jdata: null
zkdata: null
+ zklogs: null
From 2e8954f33a10f3e11af22fe6198fea43d65d580c Mon Sep 17 00:00:00 2001
From: Aseem Bansal
Date: Tue, 31 Oct 2023 22:25:48 +0530
Subject: [PATCH 031/792] fix(policies): remove non-existent policies, fix name
(#9150)
---
.../war/src/main/resources/boot/policies.json | 17 +----------------
1 file changed, 1 insertion(+), 16 deletions(-)
diff --git a/metadata-service/war/src/main/resources/boot/policies.json b/metadata-service/war/src/main/resources/boot/policies.json
index 18cb48bfcf1f0..b7ffc11c08f05 100644
--- a/metadata-service/war/src/main/resources/boot/policies.json
+++ b/metadata-service/war/src/main/resources/boot/policies.json
@@ -56,7 +56,7 @@
"EDIT_ENTITY",
"VIEW_ENTITY_PAGE",
"EDIT_LINEAGE",
- "EDIT_ENTITY_ASSERTIONS_PRIVILEGE",
+ "EDIT_ENTITY_ASSERTIONS",
"SEARCH_PRIVILEGE",
"GET_COUNTS_PRIVILEGE",
"GET_TIMESERIES_ASPECT_PRIVILEGE",
@@ -251,11 +251,6 @@
"EDIT_GROUP_MEMBERS",
"EDIT_USER_PROFILE",
"EDIT_CONTACT_INFO",
- "MANAGE_ENTITY_TAGS_PRIVILEGE",
- "MANAGE_ENTITY_GLOSSARY_TERMS_PRIVILEGE",
- "MANAGE_DATASET_COL_GLOSSARY_TERMS_PRIVILEGE",
- "MANAGE_DATASET_COL_TAGS_PRIVILEGE",
- "EDIT_ENTITY_ASSERTIONS_PRIVILEGE",
"EDIT_LINEAGE",
"EDIT_ENTITY_QUERIES",
"SEARCH_PRIVILEGE",
@@ -336,11 +331,6 @@
"EDIT_GROUP_MEMBERS",
"EDIT_USER_PROFILE",
"EDIT_CONTACT_INFO",
- "MANAGE_ENTITY_TAGS_PRIVILEGE",
- "MANAGE_ENTITY_GLOSSARY_TERMS_PRIVILEGE",
- "MANAGE_DATASET_COL_GLOSSARY_TERMS_PRIVILEGE",
- "MANAGE_DATASET_COL_TAGS_PRIVILEGE",
- "EDIT_ENTITY_ASSERTIONS_PRIVILEGE",
"EDIT_LINEAGE",
"EDIT_ENTITY_QUERIES",
"SEARCH_PRIVILEGE",
@@ -441,11 +431,6 @@
"EDIT_GROUP_MEMBERS",
"EDIT_USER_PROFILE",
"EDIT_CONTACT_INFO",
- "MANAGE_ENTITY_TAGS_PRIVILEGE",
- "MANAGE_ENTITY_GLOSSARY_TERMS_PRIVILEGE",
- "MANAGE_DATASET_COL_GLOSSARY_TERMS_PRIVILEGE",
- "MANAGE_DATASET_COL_TAGS_PRIVILEGE",
- "EDIT_ENTITY_ASSERTIONS_PRIVILEGE",
"EDIT_LINEAGE",
"EDIT_ENTITY_QUERIES",
"GET_TIMELINE_PRIVILEGE",
From b8dcc86281d06fcde35773fd4ef4933f5b553fd7 Mon Sep 17 00:00:00 2001
From: Kos Korchak <97058061+kkorchak@users.noreply.github.com>
Date: Tue, 31 Oct 2023 15:48:34 -0400
Subject: [PATCH 032/792] refactor(smoke): Fix for a test that passed on Oss
and failed on Saas (#9147)
---
.../cypress/cypress/e2e/lineage/download_lineage_results.js | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/smoke-test/tests/cypress/cypress/e2e/lineage/download_lineage_results.js b/smoke-test/tests/cypress/cypress/e2e/lineage/download_lineage_results.js
index dc6efc9f7df66..ed4167b87c506 100644
--- a/smoke-test/tests/cypress/cypress/e2e/lineage/download_lineage_results.js
+++ b/smoke-test/tests/cypress/cypress/e2e/lineage/download_lineage_results.js
@@ -37,7 +37,7 @@ describe("download lineage results to .csv file", () => {
cy.openEntityTab("Lineage");
// Verify 1st degree of dependencies
- cy.contains(/1 - 3 of 3/);
+ cy.contains(/1 - [3-4] of [3-4]/);
downloadCsvFile("first_degree_results.csv");
let first_degree_csv = cy.readFile('cypress/downloads/first_degree_results.csv');
first_degree.forEach(function (urn) {
@@ -52,7 +52,7 @@ describe("download lineage results to .csv file", () => {
// Verify 1st and 2nd degree of dependencies
cy.get('[data-testid="facet-degree-2"]').click().wait(5000);
- cy.contains(/1 - 7 of 7/);
+ cy.contains(/1 - [7-8] of [7-8]/);
downloadCsvFile("second_degree_results.csv");
let second_degree_csv = cy.readFile('cypress/downloads/second_degree_results.csv');
first_degree.forEach(function (urn) {
@@ -67,7 +67,7 @@ describe("download lineage results to .csv file", () => {
// Verify 1st 2nd and 3+ degree of dependencies(Verify multi page download)
cy.get('[data-testid="facet-degree-3+"]').click().wait(5000);
- cy.contains(/1 - 10 of 13/);
+ cy.contains(/1 - 10 of 1[3-4]/);
downloadCsvFile("third_plus_degree_results.csv");
let third_degree_csv = cy.readFile('cypress/downloads/third_plus_degree_results.csv');
first_degree.forEach(function (urn) {
From dae320c9bc28b80c6110395092d4223e9a37258b Mon Sep 17 00:00:00 2001
From: sachinsaju <33017477+sachinsaju@users.noreply.github.com>
Date: Wed, 1 Nov 2023 04:28:39 +0530
Subject: [PATCH 033/792] docs(teradata): teradata doc external link 404 fix
(#9152)
---
metadata-ingestion/docs/sources/teradata/teradata_pre.md | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/metadata-ingestion/docs/sources/teradata/teradata_pre.md b/metadata-ingestion/docs/sources/teradata/teradata_pre.md
index 7263a59f5ea3d..7b4da1255d575 100644
--- a/metadata-ingestion/docs/sources/teradata/teradata_pre.md
+++ b/metadata-ingestion/docs/sources/teradata/teradata_pre.md
@@ -25,4 +25,4 @@ will fit for your queries (the default query text size Teradata captures is max
REPLACE QUERY LOGGING LIMIT SQLTEXT=2000 ON ALL;
```
See more here about query logging:
- [https://docs.teradata.com/r/Teradata-VantageCloud-Lake/Database-Reference/Database-Administration/Tracking-Query-Behavior-with-Database-Query-Logging-Operational-DBAs]()
+ [https://docs.teradata.com/r/Teradata-VantageCloud-Lake/Database-Reference/Database-Administration/Tracking-Query-Behavior-with-Database-Query-Logging-Operational-DBAs](https://docs.teradata.com/r/Teradata-VantageCloud-Lake/Database-Reference/Database-Administration/Tracking-Query-Behavior-with-Database-Query-Logging-Operational-DBAs)
From 7a31950f794b2b0527ad685cbd08e967b524bfec Mon Sep 17 00:00:00 2001
From: "Jia (Jason) Teoh"
Date: Tue, 31 Oct 2023 18:41:16 -0700
Subject: [PATCH 034/792] fix(datahub-client): Include relocation for snakeyaml
dependency. (#8911)
Co-authored-by: david-leifker <114954101+david-leifker@users.noreply.github.com>
---
metadata-integration/java/datahub-client/build.gradle | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/metadata-integration/java/datahub-client/build.gradle b/metadata-integration/java/datahub-client/build.gradle
index e6210f1f073f6..0bf6b18fa5073 100644
--- a/metadata-integration/java/datahub-client/build.gradle
+++ b/metadata-integration/java/datahub-client/build.gradle
@@ -97,6 +97,7 @@ shadowJar {
// we can move to automatic relocation using ConfigureShadowRelocation after we get to a good place on these first
relocate 'org.springframework', 'datahub.shaded.org.springframework'
relocate 'com.fasterxml.jackson', 'datahub.shaded.jackson'
+ relocate 'org.yaml', 'io.acryl.shaded.org.yaml' // Required for shading snakeyaml
relocate 'net.jcip.annotations', 'datahub.shaded.annotations'
relocate 'javassist', 'datahub.shaded.javassist'
relocate 'edu.umd.cs.findbugs', 'datahub.shaded.findbugs'
@@ -242,4 +243,4 @@ checkstyleMain.exclude '**/generated/**'
clean {
project.delete("$projectDir/generated")
-}
\ No newline at end of file
+}
From 73514ad9c5643cc5fbbb1edb0991d4aea0812459 Mon Sep 17 00:00:00 2001
From: Harshal Sheth
Date: Tue, 31 Oct 2023 21:28:38 -0700
Subject: [PATCH 035/792] fix(ingest): cleanup large images in CI (#9153)
---
.../tests/integration/sql_server/test_sql_server.py | 5 ++++-
metadata-ingestion/tests/integration/vertica/test_vertica.py | 5 ++++-
metadata-ingestion/tests/test_helpers/docker_helpers.py | 5 +++++
3 files changed, 13 insertions(+), 2 deletions(-)
diff --git a/metadata-ingestion/tests/integration/sql_server/test_sql_server.py b/metadata-ingestion/tests/integration/sql_server/test_sql_server.py
index 099690fed34c2..f439a322c2677 100644
--- a/metadata-ingestion/tests/integration/sql_server/test_sql_server.py
+++ b/metadata-ingestion/tests/integration/sql_server/test_sql_server.py
@@ -6,7 +6,7 @@
from tests.test_helpers import mce_helpers
from tests.test_helpers.click_helpers import run_datahub_cmd
-from tests.test_helpers.docker_helpers import wait_for_port
+from tests.test_helpers.docker_helpers import cleanup_image, wait_for_port
@pytest.fixture(scope="module")
@@ -29,6 +29,9 @@ def mssql_runner(docker_compose_runner, pytestconfig):
assert ret.returncode == 0
yield docker_services
+ # The image is pretty large, so we remove it after the test.
+ cleanup_image("mcr.microsoft.com/mssql/server")
+
SOURCE_FILES_PATH = "./tests/integration/sql_server/source_files"
config_file = os.listdir(SOURCE_FILES_PATH)
diff --git a/metadata-ingestion/tests/integration/vertica/test_vertica.py b/metadata-ingestion/tests/integration/vertica/test_vertica.py
index 94ad33ba21ce4..d7b4c390f75d9 100644
--- a/metadata-ingestion/tests/integration/vertica/test_vertica.py
+++ b/metadata-ingestion/tests/integration/vertica/test_vertica.py
@@ -6,7 +6,7 @@
from tests.test_helpers import mce_helpers
from tests.test_helpers.click_helpers import run_datahub_cmd
-from tests.test_helpers.docker_helpers import wait_for_port
+from tests.test_helpers.docker_helpers import cleanup_image, wait_for_port
FROZEN_TIME = "2020-04-14 07:00:00"
@@ -49,6 +49,9 @@ def vertica_runner(docker_compose_runner, test_resources_dir):
yield docker_services
+ # The image is pretty large, so we remove it after the test.
+ cleanup_image("vertica/vertica-ce")
+
@freeze_time(FROZEN_TIME)
@pytest.mark.integration
diff --git a/metadata-ingestion/tests/test_helpers/docker_helpers.py b/metadata-ingestion/tests/test_helpers/docker_helpers.py
index 30157c3a78094..2eb61068196a2 100644
--- a/metadata-ingestion/tests/test_helpers/docker_helpers.py
+++ b/metadata-ingestion/tests/test_helpers/docker_helpers.py
@@ -1,5 +1,6 @@
import contextlib
import logging
+import os
import subprocess
from typing import Callable, Optional, Union
@@ -78,6 +79,10 @@ def run(
def cleanup_image(image_name: str) -> None:
assert ":" not in image_name, "image_name should not contain a tag"
+ if not os.environ.get("CI"):
+ logger.debug("Not cleaning up images to speed up local development")
+ return
+
images_proc = subprocess.run(
f"docker image ls --filter 'reference={image_name}*' -q",
shell=True,
From d2314976033e42c13b7897f46ea0f227afb7c90b Mon Sep 17 00:00:00 2001
From: Harshal Sheth
Date: Tue, 31 Oct 2023 21:37:11 -0700
Subject: [PATCH 036/792] build: increase gradle retries (#9091)
---
gradle.properties | 7 +++++++
1 file changed, 7 insertions(+)
diff --git a/gradle.properties b/gradle.properties
index 2b211e725359a..1cd349344b432 100644
--- a/gradle.properties
+++ b/gradle.properties
@@ -6,9 +6,16 @@ org.gradle.caching=false
# Increase gradle JVM memory to 3GB to allow tests to run locally
org.gradle.jvmargs=-Xmx3000m
# Increase retries to 5 (from default of 3) and increase interval from 125ms to 1s.
+# Based on this thread https://github.com/gradle/gradle/issues/4629, it's unclear
+# if we should be using systemProp or not. We're using both for now.
org.gradle.internal.repository.max.retries=5
org.gradle.internal.repository.max.tentatives=5
org.gradle.internal.repository.initial.backoff=1000
+systemProp.org.gradle.internal.http.connectionTimeout=120000
+systemProp.org.gradle.internal.http.socketTimeout=120000
+systemProp.org.gradle.internal.repository.max.retries=5
+systemProp.org.gradle.internal.repository.max.tentatives=5
+systemProp.org.gradle.internal.repository.initial.backoff=1000
# Needed to publish to Nexus from a sub-module
gnsp.disableApplyOnlyOnRootProjectEnforcement=true
From 55f14530a397f75a9201db11c13bd7bbbb25162c Mon Sep 17 00:00:00 2001
From: Harshal Sheth
Date: Wed, 1 Nov 2023 00:12:52 -0700
Subject: [PATCH 037/792] feat(ingest): bump sqlglot parser (#9155)
---
metadata-ingestion/setup.py | 2 +-
.../unit/sql_parsing/test_sqlglot_lineage.py | 19 ++++++++++++++++++-
2 files changed, 19 insertions(+), 2 deletions(-)
diff --git a/metadata-ingestion/setup.py b/metadata-ingestion/setup.py
index 151842bd84d0a..afce8dcee840b 100644
--- a/metadata-ingestion/setup.py
+++ b/metadata-ingestion/setup.py
@@ -108,7 +108,7 @@
sqlglot_lib = {
# Using an Acryl fork of sqlglot.
# https://github.com/tobymao/sqlglot/compare/main...hsheth2:sqlglot:hsheth?expand=1
- "acryl-sqlglot==18.17.1.dev16",
+ "acryl-sqlglot==19.0.2.dev10",
}
sql_common = (
diff --git a/metadata-ingestion/tests/unit/sql_parsing/test_sqlglot_lineage.py b/metadata-ingestion/tests/unit/sql_parsing/test_sqlglot_lineage.py
index 3b9fa0d55f18d..c420f2b8438ce 100644
--- a/metadata-ingestion/tests/unit/sql_parsing/test_sqlglot_lineage.py
+++ b/metadata-ingestion/tests/unit/sql_parsing/test_sqlglot_lineage.py
@@ -676,9 +676,13 @@ def test_teradata_default_normalization():
def test_teradata_strange_operators():
+ # This is a test for the following operators:
+ # - `SEL` (select)
+ # - `EQ` (equals)
+ # - `MINUS` (except)
assert_sql_result(
"""
-select col1, col2 from dbc.table1
+sel col1, col2 from dbc.table1
where col1 eq 'value1'
minus
select col1, col2 from dbc.table2
@@ -689,6 +693,19 @@ def test_teradata_strange_operators():
)
+@pytest.mark.skip("sqlglot doesn't support this cast syntax yet")
+def test_teradata_cast_syntax():
+ assert_sql_result(
+ """
+SELECT my_table.date_col MONTH(4) AS month_col
+FROM my_table
+""",
+ dialect="teradata",
+ default_schema="dbc",
+ expected_file=RESOURCE_DIR / "test_teradata_cast_syntax.json",
+ )
+
+
def test_snowflake_update_hardcoded():
assert_sql_result(
"""
From 876de214c9a11f8928d8eafe5c7f658d5b9dc61f Mon Sep 17 00:00:00 2001
From: Tony Ouyang
Date: Wed, 1 Nov 2023 00:13:17 -0700
Subject: [PATCH 038/792] feat(ingest/mongodb): support stateful ingestion
(#9118)
---
.../src/datahub/ingestion/source/mongodb.py | 74 +-
.../mongodb/mongodb_mces_golden.json | 8320 +++++++++--------
2 files changed, 4270 insertions(+), 4124 deletions(-)
diff --git a/metadata-ingestion/src/datahub/ingestion/source/mongodb.py b/metadata-ingestion/src/datahub/ingestion/source/mongodb.py
index 890c5c64bd5e6..ce2b9ce2981e0 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/mongodb.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/mongodb.py
@@ -15,7 +15,12 @@
EnvConfigMixin,
PlatformInstanceConfigMixin,
)
-from datahub.emitter.mce_builder import make_dataset_urn_with_platform_instance
+from datahub.emitter.mce_builder import (
+ make_data_platform_urn,
+ make_dataplatform_instance_urn,
+ make_dataset_urn_with_platform_instance,
+)
+from datahub.emitter.mcp import MetadataChangeProposalWrapper
from datahub.ingestion.api.common import PipelineContext
from datahub.ingestion.api.decorators import (
SourceCapability,
@@ -25,14 +30,21 @@
platform_name,
support_status,
)
-from datahub.ingestion.api.source import Source, SourceReport
+from datahub.ingestion.api.source import MetadataWorkUnitProcessor
from datahub.ingestion.api.workunit import MetadataWorkUnit
from datahub.ingestion.source.schema_inference.object import (
SchemaDescription,
construct_schema,
)
-from datahub.metadata.com.linkedin.pegasus2avro.metadata.snapshot import DatasetSnapshot
-from datahub.metadata.com.linkedin.pegasus2avro.mxe import MetadataChangeEvent
+from datahub.ingestion.source.state.stale_entity_removal_handler import (
+ StaleEntityRemovalHandler,
+ StaleEntityRemovalSourceReport,
+ StatefulIngestionConfigBase,
+ StatefulStaleMetadataRemovalConfig,
+)
+from datahub.ingestion.source.state.stateful_ingestion_base import (
+ StatefulIngestionSourceBase,
+)
from datahub.metadata.com.linkedin.pegasus2avro.schema import (
ArrayTypeClass,
BooleanTypeClass,
@@ -48,7 +60,10 @@
TimeTypeClass,
UnionTypeClass,
)
-from datahub.metadata.schema_classes import DatasetPropertiesClass
+from datahub.metadata.schema_classes import (
+ DataPlatformInstanceClass,
+ DatasetPropertiesClass,
+)
logger = logging.getLogger(__name__)
@@ -59,7 +74,9 @@
DENY_DATABASE_LIST = set(["admin", "config", "local"])
-class MongoDBConfig(PlatformInstanceConfigMixin, EnvConfigMixin):
+class MongoDBConfig(
+ PlatformInstanceConfigMixin, EnvConfigMixin, StatefulIngestionConfigBase
+):
# See the MongoDB authentication docs for details and examples.
# https://pymongo.readthedocs.io/en/stable/examples/authentication.html
connect_uri: str = Field(
@@ -99,6 +116,8 @@ class MongoDBConfig(PlatformInstanceConfigMixin, EnvConfigMixin):
default=AllowDenyPattern.allow_all(),
description="regex patterns for collections to filter in ingestion.",
)
+ # Custom Stateful Ingestion settings
+ stateful_ingestion: Optional[StatefulStaleMetadataRemovalConfig] = None
@validator("maxDocumentSize")
def check_max_doc_size_filter_is_valid(cls, doc_size_filter_value):
@@ -108,7 +127,7 @@ def check_max_doc_size_filter_is_valid(cls, doc_size_filter_value):
@dataclass
-class MongoDBSourceReport(SourceReport):
+class MongoDBSourceReport(StaleEntityRemovalSourceReport):
filtered: List[str] = field(default_factory=list)
def report_dropped(self, name: str) -> None:
@@ -129,6 +148,7 @@ def report_dropped(self, name: str) -> None:
bson.timestamp.Timestamp: "timestamp",
bson.dbref.DBRef: "dbref",
bson.objectid.ObjectId: "oid",
+ bson.Decimal128: "numberDecimal",
"mixed": "mixed",
}
@@ -145,6 +165,7 @@ def report_dropped(self, name: str) -> None:
bson.timestamp.Timestamp: TimeTypeClass,
bson.dbref.DBRef: BytesTypeClass,
bson.objectid.ObjectId: BytesTypeClass,
+ bson.Decimal128: NumberTypeClass,
dict: RecordTypeClass,
"mixed": UnionTypeClass,
}
@@ -206,7 +227,7 @@ def construct_schema_pymongo(
@capability(SourceCapability.PLATFORM_INSTANCE, "Enabled by default")
@capability(SourceCapability.SCHEMA_METADATA, "Enabled by default")
@dataclass
-class MongoDBSource(Source):
+class MongoDBSource(StatefulIngestionSourceBase):
"""
This plugin extracts the following:
@@ -227,7 +248,7 @@ class MongoDBSource(Source):
mongo_client: MongoClient
def __init__(self, ctx: PipelineContext, config: MongoDBConfig):
- super().__init__(ctx)
+ super().__init__(config, ctx)
self.config = config
self.report = MongoDBSourceReport()
@@ -254,6 +275,14 @@ def create(cls, config_dict: dict, ctx: PipelineContext) -> "MongoDBSource":
config = MongoDBConfig.parse_obj(config_dict)
return cls(ctx, config)
+ def get_workunit_processors(self) -> List[Optional[MetadataWorkUnitProcessor]]:
+ return [
+ *super().get_workunit_processors(),
+ StaleEntityRemovalHandler.create(
+ self, self.config, self.ctx
+ ).workunit_processor,
+ ]
+
def get_pymongo_type_string(
self, field_type: Union[Type, str], collection_name: str
) -> str:
@@ -332,16 +361,18 @@ def get_workunits_internal(self) -> Iterable[MetadataWorkUnit]:
platform_instance=self.config.platform_instance,
)
- dataset_snapshot = DatasetSnapshot(
- urn=dataset_urn,
- aspects=[],
- )
+ if self.config.platform_instance:
+ data_platform_instance = DataPlatformInstanceClass(
+ platform=make_data_platform_urn(platform),
+ instance=make_dataplatform_instance_urn(
+ platform, self.config.platform_instance
+ ),
+ )
dataset_properties = DatasetPropertiesClass(
tags=[],
customProperties={},
)
- dataset_snapshot.aspects.append(dataset_properties)
if self.config.enableSchemaInference:
assert self.config.maxDocumentSize is not None
@@ -412,13 +443,20 @@ def get_workunits_internal(self) -> Iterable[MetadataWorkUnit]:
fields=canonical_schema,
)
- dataset_snapshot.aspects.append(schema_metadata)
-
# TODO: use list_indexes() or index_information() to get index information
# See https://pymongo.readthedocs.io/en/stable/api/pymongo/collection.html#pymongo.collection.Collection.list_indexes.
- mce = MetadataChangeEvent(proposedSnapshot=dataset_snapshot)
- yield MetadataWorkUnit(id=dataset_name, mce=mce)
+ yield from [
+ mcp.as_workunit()
+ for mcp in MetadataChangeProposalWrapper.construct_many(
+ entityUrn=dataset_urn,
+ aspects=[
+ schema_metadata,
+ dataset_properties,
+ data_platform_instance,
+ ],
+ )
+ ]
def is_server_version_gte_4_4(self) -> bool:
try:
diff --git a/metadata-ingestion/tests/integration/mongodb/mongodb_mces_golden.json b/metadata-ingestion/tests/integration/mongodb/mongodb_mces_golden.json
index e16101b137ac9..ec3fd80e6a6ea 100644
--- a/metadata-ingestion/tests/integration/mongodb/mongodb_mces_golden.json
+++ b/metadata-ingestion/tests/integration/mongodb/mongodb_mces_golden.json
@@ -1,4136 +1,4240 @@
[
{
- "proposedSnapshot": {
- "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": {
- "urn": "urn:li:dataset:(urn:li:dataPlatform:mongodb,instance.mngdb.emptyCollection,PROD)",
- "aspects": [
- {
- "com.linkedin.pegasus2avro.dataset.DatasetProperties": {
- "customProperties": {},
- "tags": []
- }
- },
- {
- "com.linkedin.pegasus2avro.schema.SchemaMetadata": {
- "schemaName": "emptyCollection",
- "platform": "urn:li:dataPlatform:mongodb",
- "version": 0,
- "created": {
- "time": 0,
- "actor": "urn:li:corpuser:unknown"
- },
- "lastModified": {
- "time": 0,
- "actor": "urn:li:corpuser:unknown"
- },
- "hash": "",
- "platformSchema": {
- "com.linkedin.pegasus2avro.schema.Schemaless": {}
- },
- "fields": []
- }
+ "entityType": "dataset",
+ "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mongodb,instance.mngdb.emptyCollection,PROD)",
+ "changeType": "UPSERT",
+ "aspectName": "schemaMetadata",
+ "aspect": {
+ "json": {
+ "schemaName": "emptyCollection",
+ "platform": "urn:li:dataPlatform:mongodb",
+ "version": 0,
+ "created": {
+ "time": 0,
+ "actor": "urn:li:corpuser:unknown"
+ },
+ "lastModified": {
+ "time": 0,
+ "actor": "urn:li:corpuser:unknown"
+ },
+ "hash": "",
+ "platformSchema": {
+ "com.linkedin.schema.Schemaless": {}
+ },
+ "fields": []
+ }
+ },
+ "systemMetadata": {
+ "lastObserved": 1615443388097,
+ "runId": "mongodb-test",
+ "lastRunId": "no-run-id-provided"
+ }
+},
+{
+ "entityType": "dataset",
+ "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mongodb,instance.mngdb.emptyCollection,PROD)",
+ "changeType": "UPSERT",
+ "aspectName": "datasetProperties",
+ "aspect": {
+ "json": {
+ "customProperties": {},
+ "tags": []
+ }
+ },
+ "systemMetadata": {
+ "lastObserved": 1615443388097,
+ "runId": "mongodb-test",
+ "lastRunId": "no-run-id-provided"
+ }
+},
+{
+ "entityType": "dataset",
+ "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mongodb,instance.mngdb.emptyCollection,PROD)",
+ "changeType": "UPSERT",
+ "aspectName": "dataPlatformInstance",
+ "aspect": {
+ "json": {
+ "platform": "urn:li:dataPlatform:mongodb",
+ "instance": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:mongodb,instance)"
+ }
+ },
+ "systemMetadata": {
+ "lastObserved": 1615443388097,
+ "runId": "mongodb-test",
+ "lastRunId": "no-run-id-provided"
+ }
+},
+{
+ "entityType": "dataset",
+ "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mongodb,instance.mngdb.firstCollection,PROD)",
+ "changeType": "UPSERT",
+ "aspectName": "schemaMetadata",
+ "aspect": {
+ "json": {
+ "schemaName": "firstCollection",
+ "platform": "urn:li:dataPlatform:mongodb",
+ "version": 0,
+ "created": {
+ "time": 0,
+ "actor": "urn:li:corpuser:unknown"
+ },
+ "lastModified": {
+ "time": 0,
+ "actor": "urn:li:corpuser:unknown"
+ },
+ "hash": "",
+ "platformSchema": {
+ "com.linkedin.schema.Schemaless": {}
+ },
+ "fields": [
+ {
+ "fieldPath": "_id",
+ "nullable": false,
+ "type": {
+ "type": {
+ "com.linkedin.schema.BytesType": {}
+ }
+ },
+ "nativeDataType": "oid",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "age",
+ "nullable": false,
+ "type": {
+ "type": {
+ "com.linkedin.schema.NumberType": {}
+ }
+ },
+ "nativeDataType": "float",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "canSwim",
+ "nullable": false,
+ "type": {
+ "type": {
+ "com.linkedin.schema.BooleanType": {}
+ }
+ },
+ "nativeDataType": "boolean",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "emptyObject",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.schema.RecordType": {}
+ }
+ },
+ "nativeDataType": "OBJECT",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "favoriteColor",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "favoriteFood",
+ "nullable": false,
+ "type": {
+ "type": {
+ "com.linkedin.schema.RecordType": {}
+ }
+ },
+ "nativeDataType": "OBJECT",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "favoriteFood.calories",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.schema.NumberType": {}
+ }
+ },
+ "nativeDataType": "integer",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "favoriteFood.emptyObject",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.schema.RecordType": {}
+ }
+ },
+ "nativeDataType": "OBJECT",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "favoriteFood.ingredients",
+ "nullable": false,
+ "type": {
+ "type": {
+ "com.linkedin.schema.ArrayType": {}
+ }
+ },
+ "nativeDataType": "ARRAY",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "favoriteFood.ingredients.color",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "favoriteFood.ingredients.from",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "favoriteFood.ingredients.name",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "favoriteFood.name",
+ "nullable": false,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "favoriteFood.servings",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.schema.NumberType": {}
+ }
+ },
+ "nativeDataType": "float",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "legs",
+ "nullable": false,
+ "type": {
+ "type": {
+ "com.linkedin.schema.NumberType": {}
+ }
+ },
+ "nativeDataType": "integer",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "mixedType",
+ "nullable": false,
+ "type": {
+ "type": {
+ "com.linkedin.schema.UnionType": {}
+ }
+ },
+ "nativeDataType": "mixed",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "name",
+ "nullable": false,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "seen",
+ "nullable": false,
+ "type": {
+ "type": {
+ "com.linkedin.schema.NumberType": {}
+ }
+ },
+ "nativeDataType": "float",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "servings",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.schema.NumberType": {}
+ }
+ },
+ "nativeDataType": "integer",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "sometimesNull",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "tags",
+ "nullable": false,
+ "type": {
+ "type": {
+ "com.linkedin.schema.ArrayType": {}
+ }
+ },
+ "nativeDataType": "ARRAY",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "type",
+ "nullable": false,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ }
+ ]
+ }
+ },
+ "systemMetadata": {
+ "lastObserved": 1615443388097,
+ "runId": "mongodb-test",
+ "lastRunId": "no-run-id-provided"
+ }
+},
+{
+ "entityType": "dataset",
+ "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mongodb,instance.mngdb.firstCollection,PROD)",
+ "changeType": "UPSERT",
+ "aspectName": "datasetProperties",
+ "aspect": {
+ "json": {
+ "customProperties": {},
+ "tags": []
+ }
+ },
+ "systemMetadata": {
+ "lastObserved": 1615443388097,
+ "runId": "mongodb-test",
+ "lastRunId": "no-run-id-provided"
+ }
+},
+{
+ "entityType": "dataset",
+ "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mongodb,instance.mngdb.firstCollection,PROD)",
+ "changeType": "UPSERT",
+ "aspectName": "dataPlatformInstance",
+ "aspect": {
+ "json": {
+ "platform": "urn:li:dataPlatform:mongodb",
+ "instance": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:mongodb,instance)"
+ }
+ },
+ "systemMetadata": {
+ "lastObserved": 1615443388097,
+ "runId": "mongodb-test",
+ "lastRunId": "no-run-id-provided"
+ }
+},
+{
+ "entityType": "dataset",
+ "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mongodb,instance.mngdb.largeCollection,PROD)",
+ "changeType": "UPSERT",
+ "aspectName": "schemaMetadata",
+ "aspect": {
+ "json": {
+ "schemaName": "largeCollection",
+ "platform": "urn:li:dataPlatform:mongodb",
+ "version": 0,
+ "created": {
+ "time": 0,
+ "actor": "urn:li:corpuser:unknown"
+ },
+ "lastModified": {
+ "time": 0,
+ "actor": "urn:li:corpuser:unknown"
+ },
+ "hash": "",
+ "platformSchema": {
+ "com.linkedin.schema.Schemaless": {}
+ },
+ "fields": [
+ {
+ "fieldPath": "_id",
+ "nullable": false,
+ "type": {
+ "type": {
+ "com.linkedin.schema.BytesType": {}
+ }
+ },
+ "nativeDataType": "oid",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_200",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_201",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_202",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_203",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_204",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_205",
+ "nullable": false,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_206",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_207",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_208",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_209",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_210",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_211",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_212",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_213",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_214",
+ "nullable": false,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_215",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_216",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_217",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_218",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_219",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_220",
+ "nullable": false,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_221",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_222",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_223",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_224",
+ "nullable": false,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_225",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_226",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_227",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_228",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_229",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_230",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_231",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_232",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_233",
+ "nullable": false,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_234",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_235",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_236",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_237",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_238",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_239",
+ "nullable": false,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_240",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_241",
+ "nullable": false,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_242",
+ "nullable": false,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_243",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_244",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_245",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_246",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_247",
+ "nullable": false,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_248",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_249",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_250",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_251",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_252",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_253",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_254",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_255",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_256",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_257",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_258",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_259",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_260",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_261",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_262",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_263",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_264",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_265",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_266",
+ "nullable": false,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_267",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_268",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_269",
+ "nullable": false,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_270",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_271",
+ "nullable": false,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_272",
+ "nullable": false,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_273",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_274",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_275",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_276",
+ "nullable": false,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_277",
+ "nullable": false,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_278",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_279",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_280",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_281",
+ "nullable": false,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_282",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_283",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_284",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_285",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_286",
+ "nullable": false,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_287",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_288",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_289",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_290",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_291",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_292",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_293",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_294",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_295",
+ "nullable": false,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_296",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_297",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_298",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_299",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_300",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_301",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_302",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_303",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_304",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_305",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_306",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_307",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_308",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_309",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_310",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_311",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_312",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_313",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_314",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_315",
+ "nullable": false,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_316",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_317",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_318",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_319",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_320",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_321",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_322",
+ "nullable": false,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_323",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_324",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_325",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_326",
+ "nullable": false,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_327",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_328",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_329",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_330",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_331",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_332",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_333",
+ "nullable": false,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_334",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_335",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_336",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_337",
+ "nullable": false,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_338",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_339",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_340",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_341",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_342",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_343",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_344",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_345",
+ "nullable": false,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_346",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_347",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_348",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_349",
+ "nullable": false,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_350",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_351",
+ "nullable": false,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_352",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_353",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_354",
+ "nullable": false,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_355",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_356",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_357",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_358",
+ "nullable": false,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_359",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_360",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_361",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_362",
+ "nullable": false,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_363",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_364",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_365",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_366",
+ "nullable": false,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_367",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_368",
+ "nullable": false,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_369",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_370",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_371",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_372",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_374",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_375",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_376",
+ "nullable": false,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_377",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_378",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_379",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_380",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_381",
+ "nullable": false,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_382",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_383",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_384",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_385",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_386",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_387",
+ "nullable": false,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_388",
+ "nullable": false,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_389",
+ "nullable": false,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_390",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_391",
+ "nullable": false,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_392",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_393",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_394",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_395",
+ "nullable": false,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_396",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_397",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_398",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_399",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_400",
+ "nullable": false,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_401",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_402",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_403",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_404",
+ "nullable": false,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_405",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_406",
+ "nullable": false,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_407",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_408",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_409",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_410",
+ "nullable": false,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_411",
+ "nullable": false,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_412",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_413",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_414",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_415",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_416",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_417",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_418",
+ "nullable": false,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_419",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_420",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_421",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_422",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_423",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_424",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_425",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_426",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_427",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_428",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_429",
+ "nullable": false,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_430",
+ "nullable": false,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_431",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_432",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_433",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_434",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_435",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_436",
+ "nullable": false,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_437",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_438",
+ "nullable": false,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_439",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_440",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_441",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_442",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_443",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_444",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_445",
+ "nullable": false,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_446",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_447",
+ "nullable": false,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_448",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_449",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_450",
+ "nullable": false,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_451",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_452",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_453",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_454",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_455",
+ "nullable": false,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_456",
+ "nullable": false,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_457",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_458",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_459",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_460",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_461",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_462",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_463",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_464",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_465",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_466",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_467",
+ "nullable": false,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_468",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_469",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_470",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_471",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_472",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_473",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_474",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_475",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_476",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_477",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_478",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_479",
+ "nullable": false,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_480",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_481",
+ "nullable": false,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_482",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_483",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_484",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_485",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_486",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_487",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_488",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_489",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_490",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_491",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_492",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_493",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_494",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_495",
+ "nullable": false,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_496",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_497",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_498",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "field_499",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
}
]
}
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mongodb-test"
+ "runId": "mongodb-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
- "proposedSnapshot": {
- "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": {
- "urn": "urn:li:dataset:(urn:li:dataPlatform:mongodb,instance.mngdb.firstCollection,PROD)",
- "aspects": [
- {
- "com.linkedin.pegasus2avro.dataset.DatasetProperties": {
- "customProperties": {},
- "tags": []
- }
- },
- {
- "com.linkedin.pegasus2avro.schema.SchemaMetadata": {
- "schemaName": "firstCollection",
- "platform": "urn:li:dataPlatform:mongodb",
- "version": 0,
- "created": {
- "time": 0,
- "actor": "urn:li:corpuser:unknown"
- },
- "lastModified": {
- "time": 0,
- "actor": "urn:li:corpuser:unknown"
- },
- "hash": "",
- "platformSchema": {
- "com.linkedin.pegasus2avro.schema.Schemaless": {}
- },
- "fields": [
- {
- "fieldPath": "_id",
- "nullable": false,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.BytesType": {}
- }
- },
- "nativeDataType": "oid",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "age",
- "nullable": false,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.NumberType": {}
- }
- },
- "nativeDataType": "float",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "canSwim",
- "nullable": false,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.BooleanType": {}
- }
- },
- "nativeDataType": "boolean",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "emptyObject",
- "nullable": true,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.RecordType": {}
- }
- },
- "nativeDataType": "OBJECT",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "favoriteColor",
- "nullable": true,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "favoriteFood",
- "nullable": false,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.RecordType": {}
- }
- },
- "nativeDataType": "OBJECT",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "favoriteFood.calories",
- "nullable": true,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.NumberType": {}
- }
- },
- "nativeDataType": "integer",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "favoriteFood.emptyObject",
- "nullable": true,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.RecordType": {}
- }
- },
- "nativeDataType": "OBJECT",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "favoriteFood.ingredients",
- "nullable": false,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.ArrayType": {}
- }
- },
- "nativeDataType": "ARRAY",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "favoriteFood.ingredients.color",
- "nullable": true,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "favoriteFood.ingredients.from",
- "nullable": true,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "favoriteFood.ingredients.name",
- "nullable": true,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "favoriteFood.name",
- "nullable": false,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "favoriteFood.servings",
- "nullable": true,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.NumberType": {}
- }
- },
- "nativeDataType": "float",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "legs",
- "nullable": false,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.NumberType": {}
- }
- },
- "nativeDataType": "integer",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "mixedType",
- "nullable": false,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.UnionType": {}
- }
- },
- "nativeDataType": "mixed",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "name",
- "nullable": false,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "seen",
- "nullable": false,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.NumberType": {}
- }
- },
- "nativeDataType": "float",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "servings",
- "nullable": true,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.NumberType": {}
- }
- },
- "nativeDataType": "integer",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "sometimesNull",
- "nullable": true,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "tags",
- "nullable": false,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.ArrayType": {}
- }
- },
- "nativeDataType": "ARRAY",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "type",
- "nullable": false,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- }
- ]
- }
- }
- ]
+ "entityType": "dataset",
+ "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mongodb,instance.mngdb.largeCollection,PROD)",
+ "changeType": "UPSERT",
+ "aspectName": "datasetProperties",
+ "aspect": {
+ "json": {
+ "customProperties": {
+ "schema.downsampled": "True",
+ "schema.totalFields": "501"
+ },
+ "tags": []
}
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mongodb-test"
+ "runId": "mongodb-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
- "proposedSnapshot": {
- "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": {
- "urn": "urn:li:dataset:(urn:li:dataPlatform:mongodb,instance.mngdb.largeCollection,PROD)",
- "aspects": [
- {
- "com.linkedin.pegasus2avro.dataset.DatasetProperties": {
- "customProperties": {
- "schema.downsampled": "True",
- "schema.totalFields": "501"
- },
- "tags": []
- }
- },
- {
- "com.linkedin.pegasus2avro.schema.SchemaMetadata": {
- "schemaName": "largeCollection",
- "platform": "urn:li:dataPlatform:mongodb",
- "version": 0,
- "created": {
- "time": 0,
- "actor": "urn:li:corpuser:unknown"
- },
- "lastModified": {
- "time": 0,
- "actor": "urn:li:corpuser:unknown"
- },
- "hash": "",
- "platformSchema": {
- "com.linkedin.pegasus2avro.schema.Schemaless": {}
- },
- "fields": [
- {
- "fieldPath": "_id",
- "nullable": false,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.BytesType": {}
- }
- },
- "nativeDataType": "oid",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_200",
- "nullable": true,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_201",
- "nullable": true,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_202",
- "nullable": true,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_203",
- "nullable": true,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_204",
- "nullable": true,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_205",
- "nullable": false,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_206",
- "nullable": true,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_207",
- "nullable": true,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_208",
- "nullable": true,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_209",
- "nullable": true,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_210",
- "nullable": true,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_211",
- "nullable": true,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_212",
- "nullable": true,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_213",
- "nullable": true,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_214",
- "nullable": false,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_215",
- "nullable": true,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_216",
- "nullable": true,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_217",
- "nullable": true,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_218",
- "nullable": true,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_219",
- "nullable": true,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_220",
- "nullable": false,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_221",
- "nullable": true,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_222",
- "nullable": true,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_223",
- "nullable": true,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_224",
- "nullable": false,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_225",
- "nullable": true,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_226",
- "nullable": true,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_227",
- "nullable": true,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_228",
- "nullable": true,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_229",
- "nullable": true,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_230",
- "nullable": true,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_231",
- "nullable": true,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_232",
- "nullable": true,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_233",
- "nullable": false,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_234",
- "nullable": true,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_235",
- "nullable": true,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_236",
- "nullable": true,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_237",
- "nullable": true,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_238",
- "nullable": true,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_239",
- "nullable": false,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_240",
- "nullable": true,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_241",
- "nullable": false,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_242",
- "nullable": false,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_243",
- "nullable": true,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_244",
- "nullable": true,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_245",
- "nullable": true,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_246",
- "nullable": true,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_247",
- "nullable": false,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_248",
- "nullable": true,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_249",
- "nullable": true,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_250",
- "nullable": true,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_251",
- "nullable": true,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_252",
- "nullable": true,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_253",
- "nullable": true,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_254",
- "nullable": true,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_255",
- "nullable": true,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_256",
- "nullable": true,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_257",
- "nullable": true,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_258",
- "nullable": true,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_259",
- "nullable": true,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_260",
- "nullable": true,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_261",
- "nullable": true,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_262",
- "nullable": true,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_263",
- "nullable": true,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_264",
- "nullable": true,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_265",
- "nullable": true,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_266",
- "nullable": false,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_267",
- "nullable": true,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_268",
- "nullable": true,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_269",
- "nullable": false,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_270",
- "nullable": true,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_271",
- "nullable": false,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_272",
- "nullable": false,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_273",
- "nullable": true,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_274",
- "nullable": true,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_275",
- "nullable": true,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_276",
- "nullable": false,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_277",
- "nullable": false,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_278",
- "nullable": true,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_279",
- "nullable": true,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_280",
- "nullable": true,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_281",
- "nullable": false,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_282",
- "nullable": true,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_283",
- "nullable": true,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_284",
- "nullable": true,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_285",
- "nullable": true,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_286",
- "nullable": false,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_287",
- "nullable": true,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_288",
- "nullable": true,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_289",
- "nullable": true,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_290",
- "nullable": true,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_291",
- "nullable": true,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_292",
- "nullable": true,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_293",
- "nullable": true,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_294",
- "nullable": true,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_295",
- "nullable": false,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_296",
- "nullable": true,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_297",
- "nullable": true,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_298",
- "nullable": true,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_299",
- "nullable": true,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_300",
- "nullable": true,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_301",
- "nullable": true,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_302",
- "nullable": true,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_303",
- "nullable": true,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_304",
- "nullable": true,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_305",
- "nullable": true,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_306",
- "nullable": true,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_307",
- "nullable": true,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_308",
- "nullable": true,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_309",
- "nullable": true,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_310",
- "nullable": true,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_311",
- "nullable": true,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_312",
- "nullable": true,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_313",
- "nullable": true,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_314",
- "nullable": true,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_315",
- "nullable": false,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_316",
- "nullable": true,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_317",
- "nullable": true,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_318",
- "nullable": true,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_319",
- "nullable": true,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_320",
- "nullable": true,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_321",
- "nullable": true,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_322",
- "nullable": false,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_323",
- "nullable": true,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_324",
- "nullable": true,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_325",
- "nullable": true,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_326",
- "nullable": false,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_327",
- "nullable": true,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_328",
- "nullable": true,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_329",
- "nullable": true,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_330",
- "nullable": true,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_331",
- "nullable": true,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_332",
- "nullable": true,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_333",
- "nullable": false,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_334",
- "nullable": true,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_335",
- "nullable": true,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_336",
- "nullable": true,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_337",
- "nullable": false,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_338",
- "nullable": true,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_339",
- "nullable": true,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_340",
- "nullable": true,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_341",
- "nullable": true,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_342",
- "nullable": true,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_343",
- "nullable": true,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_344",
- "nullable": true,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_345",
- "nullable": false,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_346",
- "nullable": true,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_347",
- "nullable": true,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_348",
- "nullable": true,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_349",
- "nullable": false,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_350",
- "nullable": true,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_351",
- "nullable": false,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_352",
- "nullable": true,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_353",
- "nullable": true,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_354",
- "nullable": false,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_355",
- "nullable": true,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_356",
- "nullable": true,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_357",
- "nullable": true,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_358",
- "nullable": false,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_359",
- "nullable": true,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_360",
- "nullable": true,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_361",
- "nullable": true,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_362",
- "nullable": false,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_363",
- "nullable": true,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_364",
- "nullable": true,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_365",
- "nullable": true,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_366",
- "nullable": false,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_367",
- "nullable": true,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_368",
- "nullable": false,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_369",
- "nullable": true,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_370",
- "nullable": true,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_371",
- "nullable": true,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_372",
- "nullable": true,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_374",
- "nullable": true,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_375",
- "nullable": true,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_376",
- "nullable": false,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_377",
- "nullable": true,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_378",
- "nullable": true,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_379",
- "nullable": true,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_380",
- "nullable": true,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_381",
- "nullable": false,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_382",
- "nullable": true,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_383",
- "nullable": true,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_384",
- "nullable": true,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_385",
- "nullable": true,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_386",
- "nullable": true,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_387",
- "nullable": false,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_388",
- "nullable": false,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_389",
- "nullable": false,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_390",
- "nullable": true,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_391",
- "nullable": false,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_392",
- "nullable": true,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_393",
- "nullable": true,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_394",
- "nullable": true,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_395",
- "nullable": false,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_396",
- "nullable": true,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_397",
- "nullable": true,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_398",
- "nullable": true,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_399",
- "nullable": true,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_400",
- "nullable": false,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_401",
- "nullable": true,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_402",
- "nullable": true,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_403",
- "nullable": true,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_404",
- "nullable": false,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_405",
- "nullable": true,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_406",
- "nullable": false,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_407",
- "nullable": true,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_408",
- "nullable": true,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_409",
- "nullable": true,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_410",
- "nullable": false,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_411",
- "nullable": false,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_412",
- "nullable": true,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_413",
- "nullable": true,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_414",
- "nullable": true,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_415",
- "nullable": true,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_416",
- "nullable": true,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_417",
- "nullable": true,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_418",
- "nullable": false,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_419",
- "nullable": true,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_420",
- "nullable": true,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_421",
- "nullable": true,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_422",
- "nullable": true,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_423",
- "nullable": true,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_424",
- "nullable": true,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_425",
- "nullable": true,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_426",
- "nullable": true,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_427",
- "nullable": true,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_428",
- "nullable": true,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_429",
- "nullable": false,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_430",
- "nullable": false,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_431",
- "nullable": true,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_432",
- "nullable": true,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_433",
- "nullable": true,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_434",
- "nullable": true,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_435",
- "nullable": true,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_436",
- "nullable": false,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_437",
- "nullable": true,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_438",
- "nullable": false,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_439",
- "nullable": true,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_440",
- "nullable": true,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_441",
- "nullable": true,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_442",
- "nullable": true,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_443",
- "nullable": true,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_444",
- "nullable": true,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_445",
- "nullable": false,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_446",
- "nullable": true,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_447",
- "nullable": false,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_448",
- "nullable": true,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_449",
- "nullable": true,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_450",
- "nullable": false,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_451",
- "nullable": true,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_452",
- "nullable": true,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_453",
- "nullable": true,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_454",
- "nullable": true,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_455",
- "nullable": false,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_456",
- "nullable": false,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_457",
- "nullable": true,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_458",
- "nullable": true,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_459",
- "nullable": true,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_460",
- "nullable": true,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_461",
- "nullable": true,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_462",
- "nullable": true,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_463",
- "nullable": true,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_464",
- "nullable": true,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_465",
- "nullable": true,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_466",
- "nullable": true,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_467",
- "nullable": false,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_468",
- "nullable": true,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_469",
- "nullable": true,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_470",
- "nullable": true,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_471",
- "nullable": true,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_472",
- "nullable": true,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_473",
- "nullable": true,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_474",
- "nullable": true,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_475",
- "nullable": true,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_476",
- "nullable": true,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_477",
- "nullable": true,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_478",
- "nullable": true,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_479",
- "nullable": false,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_480",
- "nullable": true,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_481",
- "nullable": false,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_482",
- "nullable": true,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_483",
- "nullable": true,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_484",
- "nullable": true,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_485",
- "nullable": true,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_486",
- "nullable": true,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_487",
- "nullable": true,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_488",
- "nullable": true,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_489",
- "nullable": true,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_490",
- "nullable": true,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_491",
- "nullable": true,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_492",
- "nullable": true,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_493",
- "nullable": true,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_494",
- "nullable": true,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_495",
- "nullable": false,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_496",
- "nullable": true,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_497",
- "nullable": true,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_498",
- "nullable": true,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "field_499",
- "nullable": true,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- }
- ]
- }
- }
- ]
+ "entityType": "dataset",
+ "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mongodb,instance.mngdb.largeCollection,PROD)",
+ "changeType": "UPSERT",
+ "aspectName": "dataPlatformInstance",
+ "aspect": {
+ "json": {
+ "platform": "urn:li:dataPlatform:mongodb",
+ "instance": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:mongodb,instance)"
}
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mongodb-test"
+ "runId": "mongodb-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
- "proposedSnapshot": {
- "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": {
- "urn": "urn:li:dataset:(urn:li:dataPlatform:mongodb,instance.mngdb.secondCollection,PROD)",
- "aspects": [
- {
- "com.linkedin.pegasus2avro.dataset.DatasetProperties": {
- "customProperties": {},
- "tags": []
- }
- },
- {
- "com.linkedin.pegasus2avro.schema.SchemaMetadata": {
- "schemaName": "secondCollection",
- "platform": "urn:li:dataPlatform:mongodb",
- "version": 0,
- "created": {
- "time": 0,
- "actor": "urn:li:corpuser:unknown"
- },
- "lastModified": {
- "time": 0,
- "actor": "urn:li:corpuser:unknown"
- },
- "hash": "",
- "platformSchema": {
- "com.linkedin.pegasus2avro.schema.Schemaless": {}
- },
- "fields": [
- {
- "fieldPath": "_id",
- "nullable": false,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.BytesType": {}
- }
- },
- "nativeDataType": "oid",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "mixedType",
- "nullable": false,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.UnionType": {}
- }
- },
- "nativeDataType": "mixed",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "mixedType.fieldA",
- "nullable": true,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "mixedType.fieldTwo",
- "nullable": true,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.NumberType": {}
- }
- },
- "nativeDataType": "integer",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "name",
- "nullable": false,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.StringType": {}
- }
- },
- "nativeDataType": "string",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "nullableMixedType",
- "nullable": true,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.UnionType": {}
- }
- },
- "nativeDataType": "mixed",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "rating",
- "nullable": false,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.NumberType": {}
- }
- },
- "nativeDataType": "float",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "tasty",
- "nullable": false,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.BooleanType": {}
- }
- },
- "nativeDataType": "boolean",
- "recursive": false,
- "isPartOfKey": false
- },
- {
- "fieldPath": "varieties",
- "nullable": true,
- "type": {
- "type": {
- "com.linkedin.pegasus2avro.schema.ArrayType": {}
- }
- },
- "nativeDataType": "ARRAY",
- "recursive": false,
- "isPartOfKey": false
- }
- ]
- }
+ "entityType": "dataset",
+ "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mongodb,instance.mngdb.secondCollection,PROD)",
+ "changeType": "UPSERT",
+ "aspectName": "schemaMetadata",
+ "aspect": {
+ "json": {
+ "schemaName": "secondCollection",
+ "platform": "urn:li:dataPlatform:mongodb",
+ "version": 0,
+ "created": {
+ "time": 0,
+ "actor": "urn:li:corpuser:unknown"
+ },
+ "lastModified": {
+ "time": 0,
+ "actor": "urn:li:corpuser:unknown"
+ },
+ "hash": "",
+ "platformSchema": {
+ "com.linkedin.schema.Schemaless": {}
+ },
+ "fields": [
+ {
+ "fieldPath": "_id",
+ "nullable": false,
+ "type": {
+ "type": {
+ "com.linkedin.schema.BytesType": {}
+ }
+ },
+ "nativeDataType": "oid",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "mixedType",
+ "nullable": false,
+ "type": {
+ "type": {
+ "com.linkedin.schema.UnionType": {}
+ }
+ },
+ "nativeDataType": "mixed",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "mixedType.fieldA",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "mixedType.fieldTwo",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.schema.NumberType": {}
+ }
+ },
+ "nativeDataType": "integer",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "name",
+ "nullable": false,
+ "type": {
+ "type": {
+ "com.linkedin.schema.StringType": {}
+ }
+ },
+ "nativeDataType": "string",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "nullableMixedType",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.schema.UnionType": {}
+ }
+ },
+ "nativeDataType": "mixed",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "rating",
+ "nullable": false,
+ "type": {
+ "type": {
+ "com.linkedin.schema.NumberType": {}
+ }
+ },
+ "nativeDataType": "float",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "tasty",
+ "nullable": false,
+ "type": {
+ "type": {
+ "com.linkedin.schema.BooleanType": {}
+ }
+ },
+ "nativeDataType": "boolean",
+ "recursive": false,
+ "isPartOfKey": false
+ },
+ {
+ "fieldPath": "varieties",
+ "nullable": true,
+ "type": {
+ "type": {
+ "com.linkedin.schema.ArrayType": {}
+ }
+ },
+ "nativeDataType": "ARRAY",
+ "recursive": false,
+ "isPartOfKey": false
}
]
}
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mongodb-test"
+ "runId": "mongodb-test",
+ "lastRunId": "no-run-id-provided"
+ }
+},
+{
+ "entityType": "dataset",
+ "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mongodb,instance.mngdb.secondCollection,PROD)",
+ "changeType": "UPSERT",
+ "aspectName": "datasetProperties",
+ "aspect": {
+ "json": {
+ "customProperties": {},
+ "tags": []
+ }
+ },
+ "systemMetadata": {
+ "lastObserved": 1615443388097,
+ "runId": "mongodb-test",
+ "lastRunId": "no-run-id-provided"
+ }
+},
+{
+ "entityType": "dataset",
+ "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mongodb,instance.mngdb.secondCollection,PROD)",
+ "changeType": "UPSERT",
+ "aspectName": "dataPlatformInstance",
+ "aspect": {
+ "json": {
+ "platform": "urn:li:dataPlatform:mongodb",
+ "instance": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:mongodb,instance)"
+ }
+ },
+ "systemMetadata": {
+ "lastObserved": 1615443388097,
+ "runId": "mongodb-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -4145,7 +4249,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mongodb-test"
+ "runId": "mongodb-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -4160,7 +4265,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mongodb-test"
+ "runId": "mongodb-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -4175,7 +4281,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mongodb-test"
+ "runId": "mongodb-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -4190,7 +4297,8 @@
},
"systemMetadata": {
"lastObserved": 1615443388097,
- "runId": "mongodb-test"
+ "runId": "mongodb-test",
+ "lastRunId": "no-run-id-provided"
}
}
]
\ No newline at end of file
From f2eb0cf3073967d505004e9f2df58d5475cadec1 Mon Sep 17 00:00:00 2001
From: Kos Korchak <97058061+kkorchak@users.noreply.github.com>
Date: Wed, 1 Nov 2023 15:41:02 -0400
Subject: [PATCH 039/792] smoke-test: API test for managing secrets privilege
(#9121)
---
.../tests/privileges/test_privileges.py | 241 ++++++++++++++++++
smoke-test/tests/privileges/utils.py | 218 ++++++++++++++++
2 files changed, 459 insertions(+)
create mode 100644 smoke-test/tests/privileges/test_privileges.py
create mode 100644 smoke-test/tests/privileges/utils.py
diff --git a/smoke-test/tests/privileges/test_privileges.py b/smoke-test/tests/privileges/test_privileges.py
new file mode 100644
index 0000000000000..13d6b6cf3415a
--- /dev/null
+++ b/smoke-test/tests/privileges/test_privileges.py
@@ -0,0 +1,241 @@
+import pytest
+import tenacity
+
+from tests.utils import (get_frontend_session, wait_for_writes_to_sync, wait_for_healthcheck_util,
+ get_frontend_url, get_admin_credentials,get_sleep_info)
+from tests.privileges.utils import *
+
+sleep_sec, sleep_times = get_sleep_info()
+
+@pytest.fixture(scope="session")
+def wait_for_healthchecks():
+ wait_for_healthcheck_util()
+ yield
+
+
+@pytest.mark.dependency()
+def test_healthchecks(wait_for_healthchecks):
+ # Call to wait_for_healthchecks fixture will do the actual functionality.
+ pass
+
+
+@pytest.fixture(scope="session")
+def admin_session(wait_for_healthchecks):
+ yield get_frontend_session()
+
+
+@pytest.mark.dependency(depends=["test_healthchecks"])
+@pytest.fixture(scope="module", autouse=True)
+def privileges_and_test_user_setup(admin_session):
+ """Fixture to execute setup before and tear down after all tests are run"""
+ # Disable 'All users' privileges
+ set_base_platform_privileges_policy_status("INACTIVE", admin_session)
+ set_view_dataset_sensitive_info_policy_status("INACTIVE", admin_session)
+ set_view_entity_profile_privileges_policy_status("INACTIVE", admin_session)
+ # Sleep for eventual consistency
+ wait_for_writes_to_sync()
+
+ # Create a new user
+ admin_session = create_user(admin_session, "user", "user")
+
+ yield
+
+ # Remove test user
+ remove_user(admin_session, "urn:li:corpuser:user")
+
+ # Restore All users privileges
+ set_base_platform_privileges_policy_status("ACTIVE", admin_session)
+ set_view_dataset_sensitive_info_policy_status("ACTIVE", admin_session)
+ set_view_entity_profile_privileges_policy_status("ACTIVE", admin_session)
+
+ # Sleep for eventual consistency
+ wait_for_writes_to_sync()
+
+
+@tenacity.retry(
+ stop=tenacity.stop_after_attempt(10), wait=tenacity.wait_fixed(sleep_sec)
+)
+def _ensure_can_create_secret(session, json, urn):
+ create_secret_success = session.post(
+ f"{get_frontend_url()}/api/v2/graphql", json=json)
+ create_secret_success.raise_for_status()
+ secret_data = create_secret_success.json()
+
+ assert secret_data
+ assert secret_data["data"]
+ assert secret_data["data"]["createSecret"]
+ assert secret_data["data"]["createSecret"] == urn
+
+
+@tenacity.retry(
+ stop=tenacity.stop_after_attempt(sleep_times), wait=tenacity.wait_fixed(sleep_sec)
+)
+def _ensure_cant_create_secret(session, json):
+ create_secret_response = session.post(
+ f"{get_frontend_url()}/api/v2/graphql", json=json)
+ create_secret_response.raise_for_status()
+ create_secret_data = create_secret_response.json()
+
+ assert create_secret_data["errors"][0]["extensions"]["code"] == 403
+ assert create_secret_data["errors"][0]["extensions"]["type"] == "UNAUTHORIZED"
+ assert create_secret_data["data"]["createSecret"] == None
+
+
+@tenacity.retry(
+ stop=tenacity.stop_after_attempt(10), wait=tenacity.wait_fixed(sleep_sec)
+)
+def _ensure_can_create_ingestion_source(session, json):
+ create_ingestion_success = session.post(
+ f"{get_frontend_url()}/api/v2/graphql", json=json)
+ create_ingestion_success.raise_for_status()
+ ingestion_data = create_ingestion_success.json()
+
+ assert ingestion_data
+ assert ingestion_data["data"]
+ assert ingestion_data["data"]["createIngestionSource"]
+ assert ingestion_data["data"]["createIngestionSource"] is not None
+
+ return ingestion_data["data"]["createIngestionSource"]
+
+
+@tenacity.retry(
+ stop=tenacity.stop_after_attempt(sleep_times), wait=tenacity.wait_fixed(sleep_sec)
+)
+def _ensure_cant_create_ingestion_source(session, json):
+ create_source_response = session.post(
+ f"{get_frontend_url()}/api/v2/graphql", json=json)
+ create_source_response.raise_for_status()
+ create_source_data = create_source_response.json()
+
+ assert create_source_data["errors"][0]["extensions"]["code"] == 403
+ assert create_source_data["errors"][0]["extensions"]["type"] == "UNAUTHORIZED"
+ assert create_source_data["data"]["createIngestionSource"] == None
+
+
+@pytest.mark.dependency(depends=["test_healthchecks"])
+def test_privilege_to_create_and_manage_secrets():
+
+ (admin_user, admin_pass) = get_admin_credentials()
+ admin_session = login_as(admin_user, admin_pass)
+ user_session = login_as("user", "user")
+ secret_urn = "urn:li:dataHubSecret:TestSecretName"
+
+ # Verify new user can't create secrets
+ create_secret = {
+ "query": """mutation createSecret($input: CreateSecretInput!) {\n
+ createSecret(input: $input)\n}""",
+ "variables": {
+ "input":{
+ "name":"TestSecretName",
+ "value":"Test Secret Value",
+ "description":"Test Secret Description"
+ }
+ },
+ }
+ _ensure_cant_create_secret(user_session, create_secret)
+
+
+ # Assign privileges to the new user to manage secrets
+ policy_urn = create_user_policy("urn:li:corpuser:user", ["MANAGE_SECRETS"], admin_session)
+
+ # Verify new user can create and manage secrets
+ # Create a secret
+ _ensure_can_create_secret(user_session, create_secret, secret_urn)
+
+
+ # Remove a secret
+ remove_secret = {
+ "query": """mutation deleteSecret($urn: String!) {\n
+ deleteSecret(urn: $urn)\n}""",
+ "variables": {
+ "urn": secret_urn
+ },
+ }
+
+ remove_secret_response = user_session.post(f"{get_frontend_url()}/api/v2/graphql", json=remove_secret)
+ remove_secret_response.raise_for_status()
+ secret_data = remove_secret_response.json()
+
+ assert secret_data
+ assert secret_data["data"]
+ assert secret_data["data"]["deleteSecret"]
+ assert secret_data["data"]["deleteSecret"] == secret_urn
+
+
+ # Remove the policy
+ remove_policy(policy_urn, admin_session)
+
+ # Ensure user can't create secret after policy is removed
+ _ensure_cant_create_secret(user_session, create_secret)
+
+
+@pytest.mark.dependency(depends=["test_healthchecks"])
+def test_privilege_to_create_and_manage_ingestion_source():
+
+ (admin_user, admin_pass) = get_admin_credentials()
+ admin_session = login_as(admin_user, admin_pass)
+ user_session = login_as("user", "user")
+
+ # Verify new user can't create ingestion source
+ create_ingestion_source = {
+ "query": """mutation createIngestionSource($input: UpdateIngestionSourceInput!) {\n
+ createIngestionSource(input: $input)\n}""",
+ "variables": {"input":{"type":"snowflake","name":"test","config":
+ {"recipe":
+ "{\"source\":{\"type\":\"snowflake\",\"config\":{\"account_id\":null,\"include_table_lineage\":true,\"include_view_lineage\":true,\"include_tables\":true,\"include_views\":true,\"profiling\":{\"enabled\":true,\"profile_table_level_only\":true},\"stateful_ingestion\":{\"enabled\":true}}}}",
+ "executorId":"default","debugMode":False,"extraArgs":[]}}},
+ }
+
+ _ensure_cant_create_ingestion_source(user_session, create_ingestion_source)
+
+
+ # Assign privileges to the new user to manage ingestion source
+ policy_urn = create_user_policy("urn:li:corpuser:user", ["MANAGE_INGESTION"], admin_session)
+
+ # Verify new user can create and manage ingestion source(edit, delete)
+ ingestion_source_urn = _ensure_can_create_ingestion_source(user_session, create_ingestion_source)
+
+ # Edit ingestion source
+ update_ingestion_source = {
+ "query": """mutation updateIngestionSource($urn: String!, $input: UpdateIngestionSourceInput!) {\n
+ updateIngestionSource(urn: $urn, input: $input)\n}""",
+ "variables": {"urn":ingestion_source_urn,
+ "input":{"type":"snowflake","name":"test updated",
+ "config":{"recipe":"{\"source\":{\"type\":\"snowflake\",\"config\":{\"account_id\":null,\"include_table_lineage\":true,\"include_view_lineage\":true,\"include_tables\":true,\"include_views\":true,\"profiling\":{\"enabled\":true,\"profile_table_level_only\":true},\"stateful_ingestion\":{\"enabled\":true}}}}",
+ "executorId":"default","debugMode":False,"extraArgs":[]}}}
+ }
+
+ update_ingestion_success = user_session.post(
+ f"{get_frontend_url()}/api/v2/graphql", json=update_ingestion_source)
+ update_ingestion_success.raise_for_status()
+ ingestion_data = update_ingestion_success.json()
+
+ assert ingestion_data
+ assert ingestion_data["data"]
+ assert ingestion_data["data"]["updateIngestionSource"]
+ assert ingestion_data["data"]["updateIngestionSource"] == ingestion_source_urn
+
+
+ # Delete ingestion source
+ remove_ingestion_source = {
+ "query": """mutation deleteIngestionSource($urn: String!) {\n
+ deleteIngestionSource(urn: $urn)\n}""",
+ "variables": {
+ "urn": ingestion_source_urn
+ },
+ }
+
+ remove_ingestion_response = user_session.post(f"{get_frontend_url()}/api/v2/graphql", json=remove_ingestion_source)
+ remove_ingestion_response.raise_for_status()
+ ingestion_data = remove_ingestion_response.json()
+
+ assert ingestion_data
+ assert ingestion_data["data"]
+ assert ingestion_data["data"]["deleteIngestionSource"]
+ assert ingestion_data["data"]["deleteIngestionSource"] == ingestion_source_urn
+
+ # Remove the policy
+ remove_policy(policy_urn, admin_session)
+
+ # Ensure that user can't create ingestion source after policy is removed
+ _ensure_cant_create_ingestion_source(user_session, create_ingestion_source)
\ No newline at end of file
diff --git a/smoke-test/tests/privileges/utils.py b/smoke-test/tests/privileges/utils.py
new file mode 100644
index 0000000000000..ea1f565f6f5ac
--- /dev/null
+++ b/smoke-test/tests/privileges/utils.py
@@ -0,0 +1,218 @@
+import requests_wrapper as requests
+from tests.consistency_utils import wait_for_writes_to_sync
+from tests.utils import (get_frontend_url, wait_for_writes_to_sync, get_admin_credentials)
+
+
+def set_base_platform_privileges_policy_status(status, session):
+ base_platform_privileges = {
+ "query": """mutation updatePolicy($urn: String!, $input: PolicyUpdateInput!) {\n
+ updatePolicy(urn: $urn, input: $input) }""",
+ "variables": {
+ "urn": "urn:li:dataHubPolicy:7",
+ "input": {
+ "type": "PLATFORM",
+ "state": status,
+ "name": "All Users - Base Platform Privileges",
+ "description": "Grants base platform privileges to ALL users of DataHub. Change this policy to alter that behavior.",
+ "privileges": ["MANAGE_INGESTION",
+ "MANAGE_SECRETS",
+ "MANAGE_USERS_AND_GROUPS",
+ "VIEW_ANALYTICS",
+ "GENERATE_PERSONAL_ACCESS_TOKENS",
+ "MANAGE_DOMAINS",
+ "MANAGE_GLOBAL_ANNOUNCEMENTS",
+ "MANAGE_TESTS",
+ "MANAGE_GLOSSARIES",
+ "MANAGE_TAGS",
+ "MANAGE_GLOBAL_VIEWS",
+ "MANAGE_GLOBAL_OWNERSHIP_TYPES"],
+ "actors": {
+ "users": [],
+ "groups": None,
+ "resourceOwners": False,
+ "allUsers": True,
+ "allGroups": False,
+ "resourceOwnersTypes": None,
+ },
+ },
+ },
+ }
+ base_privileges_response = session.post(
+ f"{get_frontend_url()}/api/v2/graphql", json=base_platform_privileges)
+ base_privileges_response.raise_for_status()
+ base_res_data = base_privileges_response.json()
+ assert base_res_data["data"]["updatePolicy"] == "urn:li:dataHubPolicy:7"
+
+def set_view_dataset_sensitive_info_policy_status(status, session):
+ dataset_sensitive_information = {
+ "query": """mutation updatePolicy($urn: String!, $input: PolicyUpdateInput!) {\n
+ updatePolicy(urn: $urn, input: $input) }""",
+ "variables": {
+ "urn": "urn:li:dataHubPolicy:view-dataset-sensitive",
+ "input": {
+ "type": "METADATA",
+ "state": status,
+ "name": "All Users - View Dataset Sensitive Information",
+ "description": "Grants viewing privileges of usage and profile information of all datasets for all users",
+ "privileges": ["VIEW_DATASET_USAGE","VIEW_DATASET_PROFILE"],
+ "actors": {
+ "users": [],
+ "groups": None,
+ "resourceOwners": False,
+ "allUsers": True,
+ "allGroups": False,
+ "resourceOwnersTypes": None,
+ },
+ },
+ },
+ }
+ sensitive_info_response = session.post(
+ f"{get_frontend_url()}/api/v2/graphql", json=dataset_sensitive_information)
+ sensitive_info_response.raise_for_status()
+ sens_info_data = sensitive_info_response.json()
+ assert sens_info_data["data"]["updatePolicy"] == "urn:li:dataHubPolicy:view-dataset-sensitive"
+
+def set_view_entity_profile_privileges_policy_status(status, session):
+ view_entity_page = {
+ "query": """mutation updatePolicy($urn: String!, $input: PolicyUpdateInput!) {\n
+ updatePolicy(urn: $urn, input: $input) }""",
+ "variables": {
+ "urn": "urn:li:dataHubPolicy:view-entity-page-all",
+ "input": {
+ "type": "METADATA",
+ "state": status,
+ "name": "All Users - View Entity Page",
+ "description": "Grants entity view to all users",
+ "privileges": ["VIEW_ENTITY_PAGE",
+ "SEARCH_PRIVILEGE",
+ "GET_COUNTS_PRIVILEGE",
+ "GET_TIMESERIES_ASPECT_PRIVILEGE",
+ "GET_ENTITY_PRIVILEGE",
+ "GET_TIMELINE_PRIVILEGE"],
+ "actors": {
+ "users": [],
+ "groups": None,
+ "resourceOwners": False,
+ "allUsers": True,
+ "allGroups": False,
+ "resourceOwnersTypes": None,
+ },
+ },
+ },
+ }
+ view_entity_response = session.post(
+ f"{get_frontend_url()}/api/v2/graphql", json=view_entity_page)
+ view_entity_response.raise_for_status()
+ view_entity_data = view_entity_response.json()
+ assert view_entity_data["data"]["updatePolicy"] == "urn:li:dataHubPolicy:view-entity-page-all"
+
+def create_user(session, email, password):
+ # Remove user if exists
+ res_data = remove_user(session, f"urn:li:corpuser:{email}")
+ assert res_data
+ assert "error" not in res_data
+ # Get the invite token
+ get_invite_token_json = {
+ "query": """query getInviteToken($input: GetInviteTokenInput!) {\n
+ getInviteToken(input: $input){\n
+ inviteToken\n
+ }\n
+ }""",
+ "variables": {"input": {}},
+ }
+ get_invite_token_response = session.post(
+ f"{get_frontend_url()}/api/v2/graphql", json=get_invite_token_json
+ )
+ get_invite_token_response.raise_for_status()
+ get_invite_token_res_data = get_invite_token_response.json()
+ invite_token = get_invite_token_res_data["data"]["getInviteToken"]["inviteToken"]
+ assert invite_token is not None
+ assert "error" not in invite_token
+ # Create a new user using the invite token
+ sign_up_json = {
+ "fullName": "Test User",
+ "email": email,
+ "password": password,
+ "title": "Data Engineer",
+ "inviteToken": invite_token,
+ }
+ sign_up_response = session.post(
+ f"{get_frontend_url()}/signUp", json=sign_up_json
+ )
+ sign_up_response.raise_for_status()
+ assert sign_up_response
+ assert "error" not in sign_up_response
+ wait_for_writes_to_sync()
+ session.cookies.clear()
+ (admin_user, admin_pass) = get_admin_credentials()
+ admin_session = login_as(admin_user, admin_pass)
+ return admin_session
+
+
+def login_as(username, password):
+ session = requests.Session()
+ headers = {
+ "Content-Type": "application/json",
+ }
+ data = '{"username":"' + username + '", "password":"' + password + '"}'
+ response = session.post(f"{get_frontend_url()}/logIn", headers=headers, data=data)
+ response.raise_for_status()
+ return session
+
+def remove_user(session, urn):
+ json = {
+ "query": """mutation removeUser($urn: String!) {\n
+ removeUser(urn: $urn)
+ }""",
+ "variables": {"urn": urn},
+ }
+ response = session.post(f"{get_frontend_url()}/api/v2/graphql", json=json)
+ response.raise_for_status()
+ return response.json()
+
+def create_user_policy(user_urn, privileges, session):
+ policy = {
+ "query": """mutation createPolicy($input: PolicyUpdateInput!) {\n
+ createPolicy(input: $input) }""",
+ "variables": {
+ "input": {
+ "type": "PLATFORM",
+ "name": "Policy Name",
+ "description": "Policy Description",
+ "state": "ACTIVE",
+ "resources": {"filter":{"criteria":[]}},
+ "privileges": privileges,
+ "actors": {
+ "users": [user_urn],
+ "resourceOwners": False,
+ "allUsers": False,
+ "allGroups": False,
+ },
+ }
+ },
+ }
+
+ response = session.post(f"{get_frontend_url()}/api/v2/graphql", json=policy)
+ response.raise_for_status()
+ res_data = response.json()
+
+ assert res_data
+ assert res_data["data"]
+ assert res_data["data"]["createPolicy"]
+ return res_data["data"]["createPolicy"]
+
+def remove_policy(urn, session):
+ remove_policy_json = {
+ "query": """mutation deletePolicy($urn: String!) {\n
+ deletePolicy(urn: $urn) }""",
+ "variables": {"urn": urn},
+ }
+
+ response = session.post(f"{get_frontend_url()}/api/v2/graphql", json=remove_policy_json)
+ response.raise_for_status()
+ res_data = response.json()
+
+ assert res_data
+ assert res_data["data"]
+ assert res_data["data"]["deletePolicy"]
+ assert res_data["data"]["deletePolicy"] == urn
\ No newline at end of file
From 95d9ff2cc2b71c5062454f6da1eca5084d6dd6eb Mon Sep 17 00:00:00 2001
From: Mayuri Nehate <33225191+mayurinehate@users.noreply.github.com>
Date: Thu, 2 Nov 2023 01:44:16 +0530
Subject: [PATCH 040/792] fix(ingest): handle exceptions in min, max, mean
profiling (#9129)
---
.../ingestion/source/ge_data_profiler.py | 36 +++++++++++++++++--
1 file changed, 33 insertions(+), 3 deletions(-)
diff --git a/metadata-ingestion/src/datahub/ingestion/source/ge_data_profiler.py b/metadata-ingestion/src/datahub/ingestion/source/ge_data_profiler.py
index 9f6ac9dd21164..6b97d2eb456da 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/ge_data_profiler.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/ge_data_profiler.py
@@ -406,22 +406,52 @@ def _get_dataset_rows(self, dataset_profile: DatasetProfileClass) -> None:
def _get_dataset_column_min(
self, column_profile: DatasetFieldProfileClass, column: str
) -> None:
- if self.config.include_field_min_value:
+ if not self.config.include_field_min_value:
+ return
+ try:
column_profile.min = str(self.dataset.get_column_min(column))
+ except Exception as e:
+ logger.debug(
+ f"Caught exception while attempting to get column min for column {column}. {e}"
+ )
+ self.report.report_warning(
+ "Profiling - Unable to get column min",
+ f"{self.dataset_name}.{column}",
+ )
@_run_with_query_combiner
def _get_dataset_column_max(
self, column_profile: DatasetFieldProfileClass, column: str
) -> None:
- if self.config.include_field_max_value:
+ if not self.config.include_field_max_value:
+ return
+ try:
column_profile.max = str(self.dataset.get_column_max(column))
+ except Exception as e:
+ logger.debug(
+ f"Caught exception while attempting to get column max for column {column}. {e}"
+ )
+ self.report.report_warning(
+ "Profiling - Unable to get column max",
+ f"{self.dataset_name}.{column}",
+ )
@_run_with_query_combiner
def _get_dataset_column_mean(
self, column_profile: DatasetFieldProfileClass, column: str
) -> None:
- if self.config.include_field_mean_value:
+ if not self.config.include_field_mean_value:
+ return
+ try:
column_profile.mean = str(self.dataset.get_column_mean(column))
+ except Exception as e:
+ logger.debug(
+ f"Caught exception while attempting to get column mean for column {column}. {e}"
+ )
+ self.report.report_warning(
+ "Profiling - Unable to get column mean",
+ f"{self.dataset_name}.{column}",
+ )
@_run_with_query_combiner
def _get_dataset_column_median(
From 932eebea353cf6f31bc489428feb54b43d647075 Mon Sep 17 00:00:00 2001
From: kushagra-apptware <81357546+kushagra-apptware@users.noreply.github.com>
Date: Thu, 2 Nov 2023 01:50:15 +0530
Subject: [PATCH 041/792] feat: rename Assets tab to Owner Of (#9141)
Co-authored-by: John Joyce
---
datahub-web-react/src/app/entity/group/GroupProfile.tsx | 2 +-
datahub-web-react/src/app/entity/user/UserProfile.tsx | 2 +-
2 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/datahub-web-react/src/app/entity/group/GroupProfile.tsx b/datahub-web-react/src/app/entity/group/GroupProfile.tsx
index 53d2062277dec..11ed31e00003f 100644
--- a/datahub-web-react/src/app/entity/group/GroupProfile.tsx
+++ b/datahub-web-react/src/app/entity/group/GroupProfile.tsx
@@ -16,7 +16,7 @@ import NonExistentEntityPage from '../shared/entity/NonExistentEntityPage';
const messageStyle = { marginTop: '10%' };
export enum TabType {
- Assets = 'Assets',
+ Assets = 'Owner Of',
Members = 'Members',
}
diff --git a/datahub-web-react/src/app/entity/user/UserProfile.tsx b/datahub-web-react/src/app/entity/user/UserProfile.tsx
index 1d20072c4ea8f..e8284ba61afe4 100644
--- a/datahub-web-react/src/app/entity/user/UserProfile.tsx
+++ b/datahub-web-react/src/app/entity/user/UserProfile.tsx
@@ -17,7 +17,7 @@ export interface Props {
}
export enum TabType {
- Assets = 'Assets',
+ Assets = 'Owner Of',
Groups = 'Groups',
}
const ENABLED_TAB_TYPES = [TabType.Assets, TabType.Groups];
From 50789224a12e0f48d6b4ca2ef3876498f7738d9e Mon Sep 17 00:00:00 2001
From: Harshal Sheth
Date: Wed, 1 Nov 2023 16:58:37 -0700
Subject: [PATCH 042/792] fix(ingest/mongodb): fix schema inference for lists
of values (#9145)
---
.../datahub/ingestion/source/schema_inference/object.py | 7 +++++--
1 file changed, 5 insertions(+), 2 deletions(-)
diff --git a/metadata-ingestion/src/datahub/ingestion/source/schema_inference/object.py b/metadata-ingestion/src/datahub/ingestion/source/schema_inference/object.py
index 5797d66aa4d19..b58bdf41ccaa5 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/schema_inference/object.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/schema_inference/object.py
@@ -16,7 +16,7 @@ class SchemaDescription(BasicSchemaDescription):
nullable: bool # if field is ever missing
-def is_field_nullable(doc: Dict[str, Any], field_path: Tuple) -> bool:
+def is_field_nullable(doc: Dict[str, Any], field_path: Tuple[str, ...]) -> bool:
"""
Check if a nested field is nullable in a document from a collection.
@@ -54,7 +54,10 @@ def is_field_nullable(doc: Dict[str, Any], field_path: Tuple) -> bool:
# count empty lists of nested objects as nullable
if len(value) == 0:
return True
- return any(is_field_nullable(x, remaining_fields) for x in doc[field])
+ return any(
+ isinstance(x, dict) and is_field_nullable(x, remaining_fields)
+ for x in doc[field]
+ )
# any other types to check?
# raise ValueError("Nested type not 'list' or 'dict' encountered")
From f7cd80283ad768afe14e3cf53b9c38fe912be570 Mon Sep 17 00:00:00 2001
From: deepgarg-visa <149145061+deepgarg-visa@users.noreply.github.com>
Date: Thu, 2 Nov 2023 09:16:58 +0530
Subject: [PATCH 043/792] fix(ingest/db2): fix handling for table properties
(#9128)
Co-authored-by: Harshal Sheth
---
.../src/datahub/ingestion/source/sql/sql_common.py | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/sql_common.py b/metadata-ingestion/src/datahub/ingestion/source/sql/sql_common.py
index 51909eaf4ed55..80f828e9ea2fd 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/sql/sql_common.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/sql/sql_common.py
@@ -20,6 +20,7 @@
import sqlalchemy.dialects.postgresql.base
from sqlalchemy import create_engine, inspect
from sqlalchemy.engine.reflection import Inspector
+from sqlalchemy.engine.row import LegacyRow
from sqlalchemy.exc import ProgrammingError
from sqlalchemy.sql import sqltypes as types
from sqlalchemy.types import TypeDecorator, TypeEngine
@@ -784,7 +785,7 @@ def get_table_properties(
table_info: dict = inspector.get_table_comment(table, f'"{schema}"') # type: ignore
description = table_info.get("text")
- if type(description) is tuple:
+ if isinstance(description, LegacyRow):
# Handling for value type tuple which is coming for dialect 'db2+ibm_db'
description = table_info["text"][0]
From bab9d1c93196b7a7181525609e854640c9a13712 Mon Sep 17 00:00:00 2001
From: Harshal Sheth
Date: Wed, 1 Nov 2023 20:47:18 -0700
Subject: [PATCH 044/792] fix(ingest): fully support MCPs in urn_iter primitive
(#9157)
---
.../src/datahub/utilities/urns/urn_iter.py | 16 +++++++-------
.../tests/unit/serde/test_urn_iterator.py | 21 +++++++++++++------
2 files changed, 23 insertions(+), 14 deletions(-)
diff --git a/metadata-ingestion/src/datahub/utilities/urns/urn_iter.py b/metadata-ingestion/src/datahub/utilities/urns/urn_iter.py
index e13d439161064..169a4ac3649a3 100644
--- a/metadata-ingestion/src/datahub/utilities/urns/urn_iter.py
+++ b/metadata-ingestion/src/datahub/utilities/urns/urn_iter.py
@@ -117,17 +117,17 @@ def _modify_at_path(
if isinstance(path[0], int):
assert isinstance(model, list)
model[path[0]] = new_value
- elif isinstance(model, MetadataChangeProposalWrapper):
- setattr(model, path[0], new_value)
- else:
- assert isinstance(model, DictWrapper)
+ elif isinstance(model, DictWrapper):
model._inner_dict[path[0]] = new_value
+ else: # MCPW
+ setattr(model, path[0], new_value)
elif isinstance(path[0], int):
assert isinstance(model, list)
- return _modify_at_path(model[path[0]], path[1:], new_value)
- else:
- assert isinstance(model, DictWrapper)
- return _modify_at_path(model._inner_dict[path[0]], path[1:], new_value)
+ _modify_at_path(model[path[0]], path[1:], new_value)
+ elif isinstance(model, DictWrapper):
+ _modify_at_path(model._inner_dict[path[0]], path[1:], new_value)
+ else: # MCPW
+ _modify_at_path(getattr(model, path[0]), path[1:], new_value)
def _lowercase_dataset_urn(dataset_urn: str) -> str:
diff --git a/metadata-ingestion/tests/unit/serde/test_urn_iterator.py b/metadata-ingestion/tests/unit/serde/test_urn_iterator.py
index 9657ac45068da..135580dcdff13 100644
--- a/metadata-ingestion/tests/unit/serde/test_urn_iterator.py
+++ b/metadata-ingestion/tests/unit/serde/test_urn_iterator.py
@@ -1,4 +1,5 @@
import datahub.emitter.mce_builder as builder
+from datahub.emitter.mcp import MetadataChangeProposalWrapper
from datahub.metadata.com.linkedin.pegasus2avro.dataset import (
DatasetLineageTypeClass,
FineGrainedLineage,
@@ -10,11 +11,11 @@
from datahub.utilities.urns.urn_iter import list_urns_with_path, lowercase_dataset_urns
-def _datasetUrn(tbl):
+def _datasetUrn(tbl: str) -> str:
return builder.make_dataset_urn("bigquery", tbl, "PROD")
-def _fldUrn(tbl, fld):
+def _fldUrn(tbl: str, fld: str) -> str:
return builder.make_schema_field_urn(_datasetUrn(tbl), fld)
@@ -114,8 +115,10 @@ def test_upstream_lineage_urn_iterator():
]
-def _make_test_lineage_obj(upstream: str, downstream: str) -> UpstreamLineage:
- return UpstreamLineage(
+def _make_test_lineage_obj(
+ table: str, upstream: str, downstream: str
+) -> MetadataChangeProposalWrapper:
+ lineage = UpstreamLineage(
upstreams=[
Upstream(
dataset=_datasetUrn(upstream),
@@ -132,11 +135,17 @@ def _make_test_lineage_obj(upstream: str, downstream: str) -> UpstreamLineage:
],
)
+ return MetadataChangeProposalWrapper(entityUrn=_datasetUrn(table), aspect=lineage)
+
def test_dataset_urn_lowercase_transformer():
- original = _make_test_lineage_obj("upstreamTable", "downstreamTable")
+ original = _make_test_lineage_obj(
+ "mainTableName", "upstreamTable", "downstreamTable"
+ )
- expected = _make_test_lineage_obj("upstreamtable", "downstreamtable")
+ expected = _make_test_lineage_obj(
+ "maintablename", "upstreamtable", "downstreamtable"
+ )
assert original != expected # sanity check
From 12b41713b46ab474f0d55ea81fe0e854526036a9 Mon Sep 17 00:00:00 2001
From: Mayuri Nehate <33225191+mayurinehate@users.noreply.github.com>
Date: Thu, 2 Nov 2023 10:05:24 +0530
Subject: [PATCH 045/792] =?UTF-8?q?fix(ingest/bigquery):=20use=20correct?=
=?UTF-8?q?=20row=20count=20in=20null=20count=20profiling=20c=E2=80=A6=20(?=
=?UTF-8?q?#9123)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Co-authored-by: Harshal Sheth
Co-authored-by: Aseem Bansal
---
.../datahub/ingestion/source/ge_data_profiler.py | 13 +++++++++++--
1 file changed, 11 insertions(+), 2 deletions(-)
diff --git a/metadata-ingestion/src/datahub/ingestion/source/ge_data_profiler.py b/metadata-ingestion/src/datahub/ingestion/source/ge_data_profiler.py
index 6b97d2eb456da..c334a97680e3e 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/ge_data_profiler.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/ge_data_profiler.py
@@ -659,7 +659,16 @@ def generate_dataset_profile( # noqa: C901 (complexity)
self.query_combiner.flush()
assert profile.rowCount is not None
- row_count: int = profile.rowCount
+ row_count: int # used for null counts calculation
+ if profile.partitionSpec and "SAMPLE" in profile.partitionSpec.partition:
+ # We can alternatively use `self._get_dataset_rows(profile)` to get
+ # exact count of rows in sample, as actual rows involved in sample
+ # may be slightly different (more or less) than configured `sample_size`.
+ # However not doing so to start with, as that adds another query overhead
+ # plus approximate metrics should work for sampling based profiling.
+ row_count = self.config.sample_size
+ else:
+ row_count = profile.rowCount
for column_spec in columns_profiling_queue:
column = column_spec.column
@@ -811,7 +820,7 @@ def update_dataset_batch_use_sampling(self, profile: DatasetProfileClass) -> Non
sample_pc = 100 * self.config.sample_size / profile.rowCount
sql = (
f"SELECT * FROM {str(self.dataset._table)} "
- + f"TABLESAMPLE SYSTEM ({sample_pc:.3f} percent)"
+ + f"TABLESAMPLE SYSTEM ({sample_pc:.8f} percent)"
)
temp_table_name = create_bigquery_temp_table(
self,
From 11d8988868357b956e7b6ccfa905689d6163f814 Mon Sep 17 00:00:00 2001
From: Hyejin Yoon <0327jane@gmail.com>
Date: Thu, 2 Nov 2023 17:46:27 +0900
Subject: [PATCH 046/792] docs: add feature guides for subscriptions and
notifications (#9122)
---
docs-website/sidebars.js | 5 +
docs/managed-datahub/saas-slack-setup.md | 4 +-
.../subscription-and-notification.md | 130 ++++++++++++++++++
3 files changed, 137 insertions(+), 2 deletions(-)
create mode 100644 docs/managed-datahub/subscription-and-notification.md
diff --git a/docs-website/sidebars.js b/docs-website/sidebars.js
index 39eaea57444ed..ab4c1311d5fc7 100644
--- a/docs-website/sidebars.js
+++ b/docs-website/sidebars.js
@@ -625,6 +625,11 @@ module.exports = {
type: "doc",
id: "docs/managed-datahub/chrome-extension",
},
+ {
+ type: "doc",
+ id: "docs/managed-datahub/subscription-and-notification",
+ className: "saasOnly",
+ },
{
"Managed DataHub Release History": [
"docs/managed-datahub/release-notes/v_0_2_12",
diff --git a/docs/managed-datahub/saas-slack-setup.md b/docs/managed-datahub/saas-slack-setup.md
index 68f947f171715..8d4519b878cd8 100644
--- a/docs/managed-datahub/saas-slack-setup.md
+++ b/docs/managed-datahub/saas-slack-setup.md
@@ -1,6 +1,6 @@
import FeatureAvailability from '@site/src/components/FeatureAvailability';
-# Configure Slack Notifications
+# Configure Slack For Notifications
@@ -108,4 +108,4 @@ For now we support sending notifications to
- Click on “More”
- Click on “Copy member ID”
-
+
\ No newline at end of file
diff --git a/docs/managed-datahub/subscription-and-notification.md b/docs/managed-datahub/subscription-and-notification.md
new file mode 100644
index 0000000000000..b30a03de16511
--- /dev/null
+++ b/docs/managed-datahub/subscription-and-notification.md
@@ -0,0 +1,130 @@
+import FeatureAvailability from '@site/src/components/FeatureAvailability';
+
+# Subscriptions & Notifications
+
+
+
+DataHub's Subscriptions and Notifications feature gives you real-time change alerts on data assets of your choice.
+With this feature, you can set up subscriptions to specific changes for an Entity – and DataHub will notify you when those changes happen. Currently, DataHub supports notifications on Slack, with support for Microsoft Teams and email subscriptions forthcoming.
+
+
+
+
+
+This feature is especially useful in helping you stay on top of any upstream changes that could impact the assets you or your stakeholders rely on. It eliminates the need for you and your team to manually check for upstream changes, or for upstream stakeholders to identify and notify impacted users.
+As a user, you can subscribe to and receive notifications about changes such as deprecations, schema changes, changes in ownership, assertions, or incidents. You’ll always been in the know about potential data quality issues so you can proactively manage your data resources.
+
+## Prerequisites
+
+Once you have [configured Slack within your DataHub instance](saas-slack-setup.md), you will be able to subscribe to any Entity in DataHub and begin recieving notifications via DM.
+If you want to create and manage group-level Subscriptions for your team, you will need [the following privileges](../../docs/authorization/roles.md#role-privileges):
+
+- Manage Group Notification Settings
+- Manage Group Subscriptions
+
+## Using DataHub’s Subscriptions and Notifications Feature
+
+The first step is identifying the assets you want to subscribe to.
+DataHub’s [Lineage and Impact Analysis features](../../docs/act-on-metadata/impact-analysis.md#lineage-impact-analysis-setup-prerequisites-and-permissions) can help you identify upstream entities that could impact the assets you use and are responsible for.
+You can use the Subscriptions and Notifications feature to sign up for updates for your entire team, or just for yourself.
+
+### Subscribing Your Team/Group to Notifications
+
+The dropdown menu next to the Subscribe button lets you choose who the subscription is for. To create a group subscription, click on Manage Group Subscriptions.
+
+
+
+
+
+Next, customize the group’s subscriptions by selecting the types of changes you want the group to be notified about.
+
+
+
+
+
+Connect to Slack. Currently, Acryl's Subscriptions and Notifications feature integrates only with Slack. Add your group’s Slack Channel ID to receive notifications on Slack.
+(You can find your Channel ID in the About section of your channel on Slack.)
+
+
+
+
+
+### Individually Subscribing to an Entity
+
+Select the **Subscribe Me** option in the Subscriptions dropdown menu.
+
+
+
+
+
+Pick the updates you want to be notified about, and connect your Slack account by using your Slack Member ID.
+
+
+
+
+
+:::note
+You can find your Slack Member ID in your profile settings.
+
+
+
+
+:::
+
+### Managing Your Subscriptions
+
+You can enable, disable, or manage notifications at any time to ensure that you receive relevant updates.
+
+Simply use the Dropdown menu next to the Subscribe button to unsubscribe from the asset, or to manage/modify your subscription (say, to modify the changes you want to be updated about).
+
+
+
+
+
+You can also view and manage your subscriptions in your DataHub settings page.
+
+
+
+
+
+You can view and manage the group’s subscriptions on the group’s page on DataHub.
+
+
+
+
+
+## FAQ
+
+
+
+What changes can I be notified about using this feature?
+
+You can subscribe to deprecations, Assertion status changes, Incident status changes, Schema changes, Ownership changes, Glossary Term changes, and Tag changes.
+
+
+
+
+
+
+
+What if I no longer want to receive updates about a data asset?
+
+You can unsubscribe from any asset to stop receiving notifications about it. On the asset’s DataHub page, simply use the dropdown menu next to the Subscribe button to unsubscribe from the asset.
+
+
+
+
+
+
+
+
+What if I want to be notified about different changes?
+
+To modify your subscription, use the dropdown menu next to the Subscribe button to modify the changes you want to be notified about.
+
+
+## Reference
+
+- [DataHub Blog - Simplifying Data Monitoring & Management with Subscriptions and Notifications with Acryl DataHub](https://www.acryldata.io/blog/simplifying-data-monitoring-and-management-with-subscriptions-and-notifications-with-acryl-datahub)
+- Video Guide - Getting Started with Subscription & Notifications
+
From f42cb95b928c071b8309cf7c3e9a0fe8b41d3a90 Mon Sep 17 00:00:00 2001
From: Hyejin Yoon <0327jane@gmail.com>
Date: Thu, 2 Nov 2023 17:46:49 +0900
Subject: [PATCH 047/792] docs: unify oidc guides using tabs (#9068)
Co-authored-by: Harshal Sheth
---
docs-website/sidebars.js | 11 +-
.../guides/sso/configure-oidc-behind-proxy.md | 18 +-
.../guides/sso/configure-oidc-react-azure.md | 127 -------
.../guides/sso/configure-oidc-react-google.md | 118 ------
.../guides/sso/configure-oidc-react-okta.md | 124 ------
.../guides/sso/configure-oidc-react.md | 355 +++++++++++++-----
6 files changed, 263 insertions(+), 490 deletions(-)
delete mode 100644 docs/authentication/guides/sso/configure-oidc-react-azure.md
delete mode 100644 docs/authentication/guides/sso/configure-oidc-react-google.md
delete mode 100644 docs/authentication/guides/sso/configure-oidc-react-okta.md
diff --git a/docs-website/sidebars.js b/docs-website/sidebars.js
index ab4c1311d5fc7..9cc035f3e29e0 100644
--- a/docs-website/sidebars.js
+++ b/docs-website/sidebars.js
@@ -171,15 +171,8 @@ module.exports = {
{
"Frontend Authentication": [
"docs/authentication/guides/jaas",
- {
- "OIDC Authentication": [
- "docs/authentication/guides/sso/configure-oidc-react",
- "docs/authentication/guides/sso/configure-oidc-react-google",
- "docs/authentication/guides/sso/configure-oidc-react-okta",
- "docs/authentication/guides/sso/configure-oidc-react-azure",
- "docs/authentication/guides/sso/configure-oidc-behind-proxy",
- ],
- },
+ "docs/authentication/guides/sso/configure-oidc-react",
+ "docs/authentication/guides/sso/configure-oidc-behind-proxy",
],
},
"docs/authentication/introducing-metadata-service-authentication",
diff --git a/docs/authentication/guides/sso/configure-oidc-behind-proxy.md b/docs/authentication/guides/sso/configure-oidc-behind-proxy.md
index c998816e04735..684bf768f2baf 100644
--- a/docs/authentication/guides/sso/configure-oidc-behind-proxy.md
+++ b/docs/authentication/guides/sso/configure-oidc-behind-proxy.md
@@ -1,8 +1,9 @@
-# Configuring Frontend to use a Proxy when communicating with SSO Provider
-*Authored on 22/08/2023*
+# OIDC Proxy Configuration
-The `datahub-frontend-react` server can be configured to use an http proxy when retrieving the openid-configuration.
-This can be needed if your infrastructure is locked down and disallows connectivity by default, using proxies for fine-grained egress control.
+_Authored on 22/08/2023_
+
+The `datahub-frontend-react` server can be configured to use an http proxy when retrieving the openid-configuration.
+This can be needed if your infrastructure is locked down and disallows connectivity by default, using proxies for fine-grained egress control.
## Configure http proxy and non proxy hosts
@@ -17,7 +18,8 @@ HTTP_NON_PROXY_HOSTS=localhost|datahub-gms (or any other hosts that you would li
```
## Optional: provide custom truststore
-If your upstream proxy performs SSL termination to inspect traffic, this will result in different (self-signed) certificates for HTTPS connections.
+
+If your upstream proxy performs SSL termination to inspect traffic, this will result in different (self-signed) certificates for HTTPS connections.
The default truststore used in the `datahub-frontend-react` docker image will not trust these kinds of connections.
To address this, you can copy or mount your own truststore (provided by the proxy or network administrators) into the docker container.
@@ -36,8 +38,8 @@ FROM linkedin/datahub-frontend-react:
COPY /truststore-directory /certificates
```
-Building this Dockerfile will result in your own custom docker image on your local machine.
-You will then be able to tag it, publish it to your own registry, etc.
+Building this Dockerfile will result in your own custom docker image on your local machine.
+You will then be able to tag it, publish it to your own registry, etc.
#### Option b) Mount truststore from your host machine using a docker volume
@@ -51,7 +53,7 @@ Adapt your docker-compose.yml to include a new volume mount in the `datahub-fron
- /truststore-directory:/certificates
```
-### Reference new truststore
+### Reference new truststore
Add the following environment values to the `datahub-frontend-react` container:
diff --git a/docs/authentication/guides/sso/configure-oidc-react-azure.md b/docs/authentication/guides/sso/configure-oidc-react-azure.md
deleted file mode 100644
index 177387327c0e8..0000000000000
--- a/docs/authentication/guides/sso/configure-oidc-react-azure.md
+++ /dev/null
@@ -1,127 +0,0 @@
-# Configuring Azure Authentication for React App (OIDC)
-*Authored on 21/12/2021*
-
-`datahub-frontend` server can be configured to authenticate users over OpenID Connect (OIDC). As such, it can be configured to
-delegate authentication responsibility to identity providers like Microsoft Azure.
-
-This guide will provide steps for configuring DataHub authentication using Microsoft Azure.
-
-:::caution
-Even when OIDC is configured, the root user can still login without OIDC by going
-to `/login` URL endpoint. It is recommended that you don't use the default
-credentials by mounting a different file in the front end container. To do this
-please see [this guide](../jaas.md) to mount a custom user.props file for a JAAS authenticated deployment.
-:::
-
-## Steps
-
-### 1. Create an application registration in Microsoft Azure portal
-
-a. Using an account linked to your organization, navigate to the [Microsoft Azure Portal](https://portal.azure.com).
-
-b. Select **App registrations**, then **New registration** to register a new app.
-
-c. Name your app registration and choose who can access your application.
-
-d. Select `Web` as the **Redirect URI** type and enter the following:
-```
-https://your-datahub-domain.com/callback/oidc
-```
-If you are just testing locally, the following can be used: `http://localhost:9002/callback/oidc`.
-Azure supports more than one redirect URI, so both can be configured at the same time from the **Authentication** tab once the registration is complete.
-
-At this point, your app registration should look like the following:
-
-
-
-
-
-
-
-e. Click **Register**.
-
-### 2. Configure Authentication (optional)
-
-Once registration is done, you will land on the app registration **Overview** tab. On the left-side navigation bar, click on **Authentication** under **Manage** and add extra redirect URIs if need be (if you want to support both local testing and Azure deployments).
-
-
-
-
-
-
-
-Click **Save**.
-
-### 3. Configure Certificates & secrets
-
-On the left-side navigation bar, click on **Certificates & secrets** under **Manage**.
-Select **Client secrets**, then **New client secret**. Type in a meaningful description for your secret and select an expiry. Click the **Add** button when you are done.
-
-**IMPORTANT:** Copy the `value` of your newly create secret since Azure will never display its value afterwards.
-
-
-
-
-
-
-
-### 4. Configure API permissions
-
-On the left-side navigation bar, click on **API permissions** under **Manage**. DataHub requires the following four Microsoft Graph APIs:
-
-1. `User.Read` *(should be already configured)*
-2. `profile`
-3. `email`
-4. `openid`
-
-Click on **Add a permission**, then from the **Microsoft APIs** tab select **Microsoft Graph**, then **Delegated permissions**. From the **OpenId permissions** category, select `email`, `openid`, `profile` and click **Add permissions**.
-
-At this point, you should be looking at a screen like the following:
-
-
-
-
-
-
-
-### 5. Obtain Application (Client) ID
-
-On the left-side navigation bar, go back to the **Overview** tab. You should see the `Application (client) ID`. Save its value for the next step.
-
-### 6. Obtain Discovery URI
-
-On the same page, you should see a `Directory (tenant) ID`. Your OIDC discovery URI will be formatted as follows:
-
-```
-https://login.microsoftonline.com/{tenant ID}/v2.0/.well-known/openid-configuration
-```
-
-### 7. Configure `datahub-frontend` to enable OIDC authentication
-
-a. Open the file `docker/datahub-frontend/env/docker.env`
-
-b. Add the following configuration values to the file:
-
-```
-AUTH_OIDC_ENABLED=true
-AUTH_OIDC_CLIENT_ID=your-client-id
-AUTH_OIDC_CLIENT_SECRET=your-client-secret
-AUTH_OIDC_DISCOVERY_URI=https://login.microsoftonline.com/{tenant ID}/v2.0/.well-known/openid-configuration
-AUTH_OIDC_BASE_URL=your-datahub-url
-AUTH_OIDC_SCOPE="openid profile email"
-```
-
-Replacing the placeholders above with the client id (step 5), client secret (step 3) and tenant ID (step 6) received from Microsoft Azure.
-
-### 9. Restart `datahub-frontend-react` docker container
-
-Now, simply restart the `datahub-frontend-react` container to enable the integration.
-
-```
-docker-compose -p datahub -f docker-compose.yml -f docker-compose.override.yml up datahub-frontend-react
-```
-
-Navigate to your DataHub domain to see SSO in action.
-
-## Resources
-- [Microsoft identity platform and OpenID Connect protocol](https://docs.microsoft.com/en-us/azure/active-directory/develop/v2-protocols-oidc/)
\ No newline at end of file
diff --git a/docs/authentication/guides/sso/configure-oidc-react-google.md b/docs/authentication/guides/sso/configure-oidc-react-google.md
deleted file mode 100644
index af62185e6e787..0000000000000
--- a/docs/authentication/guides/sso/configure-oidc-react-google.md
+++ /dev/null
@@ -1,118 +0,0 @@
-# Configuring Google Authentication for React App (OIDC)
-*Authored on 3/10/2021*
-
-`datahub-frontend` server can be configured to authenticate users over OpenID Connect (OIDC). As such, it can be configured to delegate
-authentication responsibility to identity providers like Google.
-
-This guide will provide steps for configuring DataHub authentication using Google.
-
-:::caution
-Even when OIDC is configured, the root user can still login without OIDC by going
-to `/login` URL endpoint. It is recommended that you don't use the default
-credentials by mounting a different file in the front end container. To do this
-please see [this guide](../jaas.md) to mount a custom user.props file for a JAAS authenticated deployment.
-:::
-
-## Steps
-
-### 1. Create a project in the Google API Console
-
-Using an account linked to your organization, navigate to the [Google API Console](https://console.developers.google.com/) and select **New project**.
-Within this project, we will configure the OAuth2.0 screen and credentials.
-
-### 2. Create OAuth2.0 consent screen
-
-a. Navigate to `OAuth consent screen`. This is where you'll configure the screen your users see when attempting to
-log in to DataHub.
-
-b. Select `Internal` (if you only want your company users to have access) and then click **Create**.
-Note that in order to complete this step you should be logged into a Google account associated with your organization.
-
-c. Fill out the details in the App Information & Domain sections. Make sure the 'Application Home Page' provided matches where DataHub is deployed
-at your organization.
-
-
-
-
-
-
-
-Once you've completed this, **Save & Continue**.
-
-d. Configure the scopes: Next, click **Add or Remove Scopes**. Select the following scopes:
-
- - `.../auth/userinfo.email`
- - `.../auth/userinfo.profile`
- - `openid`
-
-Once you've selected these, **Save & Continue**.
-
-### 3. Configure client credentials
-
-Now navigate to the **Credentials** tab. This is where you'll obtain your client id & secret, as well as configure info
-like the redirect URI used after a user is authenticated.
-
-a. Click **Create Credentials** & select `OAuth client ID` as the credential type.
-
-b. On the following screen, select `Web application` as your Application Type.
-
-c. Add the domain where DataHub is hosted to your 'Authorized Javascript Origins'.
-
-```
-https://your-datahub-domain.com
-```
-
-d. Add the domain where DataHub is hosted with the path `/callback/oidc` appended to 'Authorized Redirect URLs'.
-
-```
-https://your-datahub-domain.com/callback/oidc
-```
-
-e. Click **Create**
-
-f. You will now receive a pair of values, a client id and a client secret. Bookmark these for the next step.
-
-At this point, you should be looking at a screen like the following:
-
-
-
-
-
-
-
-Success!
-
-### 4. Configure `datahub-frontend` to enable OIDC authentication
-
-a. Open the file `docker/datahub-frontend/env/docker.env`
-
-b. Add the following configuration values to the file:
-
-```
-AUTH_OIDC_ENABLED=true
-AUTH_OIDC_CLIENT_ID=your-client-id
-AUTH_OIDC_CLIENT_SECRET=your-client-secret
-AUTH_OIDC_DISCOVERY_URI=https://accounts.google.com/.well-known/openid-configuration
-AUTH_OIDC_BASE_URL=your-datahub-url
-AUTH_OIDC_SCOPE="openid profile email"
-AUTH_OIDC_USER_NAME_CLAIM=email
-AUTH_OIDC_USER_NAME_CLAIM_REGEX=([^@]+)
-```
-
-Replacing the placeholders above with the client id & client secret received from Google in Step 3f.
-
-
-### 5. Restart `datahub-frontend-react` docker container
-
-Now, simply restart the `datahub-frontend-react` container to enable the integration.
-
-```
-docker-compose -p datahub -f docker-compose.yml -f docker-compose.override.yml up datahub-frontend-react
-```
-
-Navigate to your DataHub domain to see SSO in action.
-
-
-## References
-
-- [OpenID Connect in Google Identity](https://developers.google.com/identity/protocols/oauth2/openid-connect)
\ No newline at end of file
diff --git a/docs/authentication/guides/sso/configure-oidc-react-okta.md b/docs/authentication/guides/sso/configure-oidc-react-okta.md
deleted file mode 100644
index 320b887a28f16..0000000000000
--- a/docs/authentication/guides/sso/configure-oidc-react-okta.md
+++ /dev/null
@@ -1,124 +0,0 @@
-# Configuring Okta Authentication for React App (OIDC)
-*Authored on 3/10/2021*
-
-`datahub-frontend` server can be configured to authenticate users over OpenID Connect (OIDC). As such, it can be configured to
-delegate authentication responsibility to identity providers like Okta.
-
-This guide will provide steps for configuring DataHub authentication using Okta.
-
-:::caution
-Even when OIDC is configured, the root user can still login without OIDC by going
-to `/login` URL endpoint. It is recommended that you don't use the default
-credentials by mounting a different file in the front end container. To do this
-please see [this guide](../jaas.md) to mount a custom user.props file for a JAAS authenticated deployment.
-:::
-
-## Steps
-
-### 1. Create an application in Okta Developer Console
-
-a. Log in to your Okta admin account & navigate to the developer console
-
-b. Select **Applications**, then **Add Application**, the **Create New App** to create a new app.
-
-c. Select `Web` as the **Platform**, and `OpenID Connect` as the **Sign on method**
-
-d. Click **Create**
-
-e. Under 'General Settings', name your application
-
-f. Below, add a **Login Redirect URI**. This should be formatted as
-
-```
-https://your-datahub-domain.com/callback/oidc
-```
-
-If you're just testing locally, this can be `http://localhost:9002/callback/oidc`.
-
-g. Below, add a **Logout Redirect URI**. This should be formatted as
-
-```
-https://your-datahub-domain.com
-```
-
-h. [Optional] If you're enabling DataHub login as an Okta tile, you'll need to provide the **Initiate Login URI**. You
-can set if to
-
-```
-https://your-datahub-domain.com/authenticate
-```
-
-If you're just testing locally, this can be `http://localhost:9002`.
-
-i. Click **Save**
-
-
-### 2. Obtain Client Credentials
-
-On the subsequent screen, you should see the client credentials. Bookmark the `Client id` and `Client secret` for the next step.
-
-### 3. Obtain Discovery URI
-
-On the same page, you should see an `Okta Domain`. Your OIDC discovery URI will be formatted as follows:
-
-```
-https://your-okta-domain.com/.well-known/openid-configuration
-```
-
-for example, `https://dev-33231928.okta.com/.well-known/openid-configuration`.
-
-At this point, you should be looking at a screen like the following:
-
-
-
-
-
-
-
-
-
-
-
-
-Success!
-
-### 4. Configure `datahub-frontend` to enable OIDC authentication
-
-a. Open the file `docker/datahub-frontend/env/docker.env`
-
-b. Add the following configuration values to the file:
-
-```
-AUTH_OIDC_ENABLED=true
-AUTH_OIDC_CLIENT_ID=your-client-id
-AUTH_OIDC_CLIENT_SECRET=your-client-secret
-AUTH_OIDC_DISCOVERY_URI=https://your-okta-domain.com/.well-known/openid-configuration
-AUTH_OIDC_BASE_URL=your-datahub-url
-AUTH_OIDC_SCOPE="openid profile email groups"
-```
-
-Replacing the placeholders above with the client id & client secret received from Okta in Step 2.
-
-> **Pro Tip!** You can easily enable Okta to return the groups that a user is associated with, which will be provisioned in DataHub, along with the user logging in. This can be enabled by setting the `AUTH_OIDC_EXTRACT_GROUPS_ENABLED` flag to `true`.
-> if they do not already exist in DataHub. You can enable your Okta application to return a 'groups' claim from the Okta Console at Applications > Your Application -> Sign On -> OpenID Connect ID Token Settings (Requires an edit).
->
-> By default, we assume that the groups will appear in a claim named "groups". This can be customized using the `AUTH_OIDC_GROUPS_CLAIM` container configuration.
->
->
-
-
-
-
-
-### 5. Restart `datahub-frontend-react` docker container
-
-Now, simply restart the `datahub-frontend-react` container to enable the integration.
-
-```
-docker-compose -p datahub -f docker-compose.yml -f docker-compose.override.yml up datahub-frontend-react
-```
-
-Navigate to your DataHub domain to see SSO in action.
-
-## Resources
-- [OAuth 2.0 and OpenID Connect Overview](https://developer.okta.com/docs/concepts/oauth-openid/)
diff --git a/docs/authentication/guides/sso/configure-oidc-react.md b/docs/authentication/guides/sso/configure-oidc-react.md
index 1671673c09318..9b4af80bb0ccd 100644
--- a/docs/authentication/guides/sso/configure-oidc-react.md
+++ b/docs/authentication/guides/sso/configure-oidc-react.md
@@ -1,59 +1,201 @@
-# Overview
+import Tabs from '@theme/Tabs';
+import TabItem from '@theme/TabItem';
+
+# OIDC Authentication
The DataHub React application supports OIDC authentication built on top of the [Pac4j Play](https://github.com/pac4j/play-pac4j) library.
This enables operators of DataHub to integrate with 3rd party identity providers like Okta, Google, Keycloak, & more to authenticate their users.
-When configured, OIDC auth will be enabled between clients of the DataHub UI & `datahub-frontend` server. Beyond this point is considered
-to be a secure environment and as such authentication is validated & enforced only at the "front door" inside datahub-frontend.
+## 1. Register an app with your Identity Provider
-:::caution
-Even if OIDC is configured the root user can still login without OIDC by going
-to `/login` URL endpoint. It is recommended that you don't use the default
-credentials by mounting a different file in the front end container. To do this
-please see [this guide](../jaas.md) to mount a custom user.props file for a JAAS authenticated deployment.
+
+
+
+#### Create a project in the Google API Console
+
+Using an account linked to your organization, navigate to the [Google API Console](https://console.developers.google.com/) and select **New project**.
+Within this project, we will configure the OAuth2.0 screen and credentials.
+
+#### Create OAuth2.0 consent screen
+
+Navigate to **OAuth consent screen**. This is where you'll configure the screen your users see when attempting to
+log in to DataHub. Select **Internal** (if you only want your company users to have access) and then click **Create**.
+Note that in order to complete this step you should be logged into a Google account associated with your organization.
+
+Fill out the details in the App Information & Domain sections. Make sure the 'Application Home Page' provided matches where DataHub is deployed
+at your organization. Once you've completed this, **Save & Continue**.
+
+
+
+
+
+#### Configure the scopes
+
+Next, click **Add or Remove Scopes**. Select the following scope and click **Save & Continue**.
+
+- .../auth/userinfo.email
+- .../auth/userinfo.profile
+- openid
+
+
+
+
+#### Create an application in Okta Developer Console
+
+Log in to your Okta admin account & navigate to the developer console. Select **Applications**, then **Add Application**, the **Create New App** to create a new app.
+Select `Web` as the **Platform**, and `OpenID Connect` as the **Sign on method**.
+
+Click **Create** and name your application under **General Settings** and save.
+
+- **Login Redirect URI** : `https://your-datahub-domain.com/callback/oidc`.
+- **Logout Redirect URI**. `https://your-datahub-domain.com`
+
+
+
+
+
+:::note Optional
+If you're enabling DataHub login as an Okta tile, you'll need to provide the **Initiate Login URI**. You
+can set if to `https://your-datahub-domain.com/authenticate`. If you're just testing locally, this can be `http://localhost:9002`.
:::
-## Provider-Specific Guides
+
+
-1. [Configuring OIDC using Google](configure-oidc-react-google.md)
-2. [Configuring OIDC using Okta](configure-oidc-react-okta.md)
-3. [Configuring OIDC using Azure](configure-oidc-react-azure.md)
+#### Create an application registration in Microsoft Azure portal
-## Configuring OIDC in React
+Using an account linked to your organization, navigate to the [Microsoft Azure Portal](https://portal.azure.com). Select **App registrations**, then **New registration** to register a new app.
-### 1. Register an app with your Identity Provider
+Name your app registration and choose who can access your application.
-To configure OIDC in React, you will most often need to register yourself as a client with your identity provider (Google, Okta, etc). Each provider may
-have their own instructions. Provided below are links to examples for Okta, Google, Azure AD, & Keycloak.
+- **Redirect URI** : Select **Web** as type and enter `https://your-datahub-domain.com/callback/oidc`
-- [Registering an App in Okta](https://developer.okta.com/docs/guides/add-an-external-idp/openidconnect/main/)
-- [OpenID Connect in Google Identity](https://developers.google.com/identity/protocols/oauth2/openid-connect)
-- [OpenID Connect authentication with Azure Active Directory](https://docs.microsoft.com/en-us/azure/active-directory/fundamentals/auth-oidc)
-- [Keycloak - Securing Applications and Services Guide](https://www.keycloak.org/docs/latest/securing_apps/)
+Azure supports more than one redirect URI, so both can be configured at the same time from the **Authentication** tab once the registration is complete.
+At this point, your app registration should look like the following. Finally, click **Register**.
+
+
+
+
-During the registration process, you'll need to provide a login redirect URI to the identity provider. This tells the identity provider
-where to redirect to once they've authenticated the end user.
+:::note Optional
+Once registration is done, you will land on the app registration **Overview** tab.
+On the left-side navigation bar, click on **Authentication** under **Manage** and add extra redirect URIs if need be (if you want to support both local testing and Azure deployments). Finally, click **Save**.
-By default, the URL will be constructed as follows:
+
+
+
-> "http://your-datahub-domain.com/callback/oidc"
+:::
+
+#### Configure Certificates & secrets
+
+On the left-side navigation bar, click on **Certificates & secrets** under **Manage**.
+Select **Client secrets**, then **New client secret**. Type in a meaningful description for your secret and select an expiry. Click the **Add** button when you are done.
+Copy the value of your newly create secret since Azure will never display its value afterwards.
+
+
+
+
+
+#### Configure API permissions
+
+On the left-side navigation bar, click on **API permissions** under **Manage**. DataHub requires the following four Microsoft Graph APIs:
-For example, if you're hosted DataHub at `datahub.myorg.com`, this
-value would be `http://datahub.myorg.com/callback/oidc`. For testing purposes you can also specify localhost as the domain name
-directly: `http://localhost:9002/callback/oidc`
+- User.Read _(should be already configured)_
+- profile
+- email
+- openid
+
+Click on **Add a permission**, then from the **Microsoft APIs** tab select **Microsoft Graph**, then **Delegated permissions**. From the **OpenId permissions** category, select `email`, `openid`, `profile` and click **Add permissions**.
+
+At this point, you should be looking at a screen like the following:
+
+
+
+
+
+
+
+
+## 2. Obtain Client Credentials & Discovery URL
The goal of this step should be to obtain the following values, which will need to be configured before deploying DataHub:
-1. **Client ID** - A unique identifier for your application with the identity provider
-2. **Client Secret** - A shared secret to use for exchange between you and your identity provider
-3. **Discovery URL** - A URL where the OIDC API of your identity provider can be discovered. This should suffixed by
- `.well-known/openid-configuration`. Sometimes, identity providers will not explicitly include this URL in their setup guides, though
- this endpoint *will* exist as per the OIDC specification. For more info see http://openid.net/specs/openid-connect-discovery-1_0.html.
+- **Client ID** - A unique identifier for your application with the identity provider
+- **Client Secret** - A shared secret to use for exchange between you and your identity provider
+- **Discovery URL** - A URL where the OIDC API of your identity provider can be discovered. This should suffixed by
+ `.well-known/openid-configuration`. Sometimes, identity providers will not explicitly include this URL in their setup guides, though
+ this endpoint _will_ exist as per the OIDC specification. For more info see http://openid.net/specs/openid-connect-discovery-1_0.html.
+
+
+
+
+
+**Obtain Client Credentials**
+
+Navigate to the **Credentials** tab. Click **Create Credentials** & select **OAuth client ID** as the credential type.
+
+On the following screen, select **Web application** as your Application Type.
+Add the domain where DataHub is hosted to your 'Authorized Javascript Origins'.
+
+```
+https://your-datahub-domain.com
+```
+
+Add the domain where DataHub is hosted with the path `/callback/oidc` appended to 'Authorized Redirect URLs'. Finally, click **Create**
+
+```
+https://your-datahub-domain.com/callback/oidc
+```
+
+You will now receive a pair of values, a client id and a client secret. Bookmark these for the next step.
+
+
+
+
+**Obtain Client Credentials**
+
+After registering the app, you should see the client credentials. Bookmark the `Client id` and `Client secret` for the next step.
+
+**Obtain Discovery URI**
+
+On the same page, you should see an `Okta Domain`. Your OIDC discovery URI will be formatted as follows:
+
+```
+https://your-okta-domain.com/.well-known/openid-configuration
+```
+
+For example, `https://dev-33231928.okta.com/.well-known/openid-configuration`.
+
+At this point, you should be looking at a screen like the following:
+
+
+
+
+
+
-### 2. Configure DataHub Frontend Server
+**Obtain Application (Client) ID**
-The second step to enabling OIDC involves configuring `datahub-frontend` to enable OIDC authentication with your Identity Provider.
+On the left-side navigation bar, go back to the **Overview** tab. You should see the `Application (client) ID`. Save its value for the next step.
+
+**Obtain Discovery URI**
+
+On the same page, you should see a `Directory (tenant) ID`. Your OIDC discovery URI will be formatted as follows:
+
+```
+https://login.microsoftonline.com/{tenant ID}/v2.0/.well-known/openid-configuration
+```
+
+
+
+
+## 3. Configure DataHub Frontend Server
+
+### Docker
+
+The next step to enabling OIDC involves configuring `datahub-frontend` to enable OIDC authentication with your Identity Provider.
To do so, you must update the `datahub-frontend` [docker.env](../../../../docker/datahub-frontend/env/docker.env) file with the
values received from your identity provider:
@@ -67,22 +209,29 @@ AUTH_OIDC_DISCOVERY_URI=your-provider-discovery-url
AUTH_OIDC_BASE_URL=your-datahub-url
```
-- `AUTH_OIDC_ENABLED`: Enable delegating authentication to OIDC identity provider
-- `AUTH_OIDC_CLIENT_ID`: Unique client id received from identity provider
-- `AUTH_OIDC_CLIENT_SECRET`: Unique client secret received from identity provider
-- `AUTH_OIDC_DISCOVERY_URI`: Location of the identity provider OIDC discovery API. Suffixed with `.well-known/openid-configuration`
-- `AUTH_OIDC_BASE_URL`: The base URL of your DataHub deployment, e.g. https://yourorgdatahub.com (prod) or http://localhost:9002 (testing)
-- `AUTH_SESSION_TTL_HOURS`: The length of time in hours before a user will be prompted to login again. Controls the actor cookie expiration time in the browser. Numeric value converted to hours, default 24.
-- `MAX_SESSION_TOKEN_AGE`: Determines the expiration time of a session token. Session tokens are stateless so this determines at what time a session token may no longer be used and a valid session token can be used until this time has passed. Accepts a valid relative Java date style String, default 24h.
+| Configuration | Description | Default |
+| ----------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | ------- |
+| AUTH_OIDC_ENABLED | Enable delegating authentication to OIDC identity provider | |
+| AUTH_OIDC_CLIENT_ID | Unique client id received from identity provider | |
+| AUTH_OIDC_CLIENT_SECRET | Unique client secret received from identity provider | |
+| AUTH_OIDC_DISCOVERY_URI | Location of the identity provider OIDC discovery API. Suffixed with `.well-known/openid-configuration` | |
+| AUTH_OIDC_BASE_URL | The base URL of your DataHub deployment, e.g. https://yourorgdatahub.com (prod) or http://localhost:9002 (testing) | |
+| AUTH_SESSION_TTL_HOURS | The length of time in hours before a user will be prompted to login again. Controls the actor cookie expiration time in the browser. Numeric value converted to hours. | 24 |
+| MAX_SESSION_TOKEN_AGE | Determines the expiration time of a session token. Session tokens are stateless so this determines at what time a session token may no longer be used and a valid session token can be used until this time has passed. Accepts a valid relative Java date style String. | 24h |
Providing these configs will cause DataHub to delegate authentication to your identity
provider, requesting the "oidc email profile" scopes and parsing the "preferred_username" claim from
the authenticated profile as the DataHub CorpUser identity.
+:::note
+
+By default, the login callback endpoint exposed by DataHub will be located at `${AUTH_OIDC_BASE_URL}/callback/oidc`. This must **exactly** match the login redirect URL you've registered with your identity provider in step 1.
+
+:::
-> By default, the login callback endpoint exposed by DataHub will be located at `${AUTH_OIDC_BASE_URL}/callback/oidc`. This must **exactly** match the login redirect URL you've registered with your identity provider in step 1.
+### Kubernetes
-In kubernetes, you can add the above env variables in the values.yaml as follows.
+In Kubernetes, you can add the above env variables in the `values.yaml` as follows.
```yaml
datahub-frontend:
@@ -102,20 +251,21 @@ datahub-frontend:
You can also package OIDC client secrets into a k8s secret by running
-```kubectl create secret generic datahub-oidc-secret --from-literal=secret=<>```
+```
+kubectl create secret generic datahub-oidc-secret --from-literal=secret=<>
+```
Then set the secret env as follows.
```yaml
- - name: AUTH_OIDC_CLIENT_SECRET
- valueFrom:
- secretKeyRef:
- name: datahub-oidc-secret
- key: secret
+- name: AUTH_OIDC_CLIENT_SECRET
+ valueFrom:
+ secretKeyRef:
+ name: datahub-oidc-secret
+ key: secret
```
-
-#### Advanced
+### Advanced OIDC Configurations
You can optionally customize the flow further using advanced configurations. These allow
you to specify the OIDC scopes requested, how the DataHub username is parsed from the claims returned by the identity provider, and how users and groups are extracted and provisioned from the OIDC claim set.
@@ -128,23 +278,15 @@ AUTH_OIDC_SCOPE=your-custom-scope
AUTH_OIDC_CLIENT_AUTHENTICATION_METHOD=authentication-method
```
-- `AUTH_OIDC_USER_NAME_CLAIM`: The attribute that will contain the username used on the DataHub platform. By default, this is "email" provided
- as part of the standard `email` scope.
-- `AUTH_OIDC_USER_NAME_CLAIM_REGEX`: A regex string used for extracting the username from the userNameClaim attribute. For example, if
- the userNameClaim field will contain an email address, and we want to omit the domain name suffix of the email, we can specify a custom
- regex to do so. (e.g. `([^@]+)`)
-- `AUTH_OIDC_SCOPE`: a string representing the scopes to be requested from the identity provider, granted by the end user. For more info,
- see [OpenID Connect Scopes](https://auth0.com/docs/scopes/openid-connect-scopes).
-- `AUTH_OIDC_CLIENT_AUTHENTICATION_METHOD`: a string representing the token authentication method to use with the identity provider. Default value
- is `client_secret_basic`, which uses HTTP Basic authentication. Another option is `client_secret_post`, which includes the client_id and secret_id
- as form parameters in the HTTP POST request. For more info, see [OAuth 2.0 Client Authentication](https://darutk.medium.com/oauth-2-0-client-authentication-4b5f929305d4)
-
-Additional OIDC Options:
+| Configuration | Description | Default |
+| -------------------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ------------------- |
+| AUTH_OIDC_USER_NAME_CLAIM | The attribute that will contain the username used on the DataHub platform. By default, this is "email" providedas part of the standard `email` scope. | |
+| AUTH_OIDC_USER_NAME_CLAIM_REGEX | A regex string used for extracting the username from the userNameClaim attribute. For example, if the userNameClaim field will contain an email address, and we want to omit the domain name suffix of the email, we can specify a customregex to do so. (e.g. `([^@]+)`) | |
+| AUTH_OIDC_SCOPE | A string representing the scopes to be requested from the identity provider, granted by the end user. For more info, see [OpenID Connect Scopes](https://auth0.com/docs/scopes/openid-connect-scopes). | |
+| AUTH_OIDC_CLIENT_AUTHENTICATION_METHOD | a string representing the token authentication method to use with the identity provider. Default value is `client_secret_basic`, which uses HTTP Basic authentication. Another option is `client_secret_post`, which includes the client_id and secret_id as form parameters in the HTTP POST request. For more info, see [OAuth 2.0 Client Authentication](https://darutk.medium.com/oauth-2-0-client-authentication-4b5f929305d4) | client_secret_basic |
+| AUTH_OIDC_PREFERRED_JWS_ALGORITHM | Can be used to select a preferred signing algorithm for id tokens. Examples include: `RS256` or `HS256`. If your IdP includes `none` before `RS256`/`HS256` in the list of signing algorithms, then this value **MUST** be set. | |
-- `AUTH_OIDC_PREFERRED_JWS_ALGORITHM` - Can be used to select a preferred signing algorithm for id tokens. Examples include: `RS256` or `HS256`. If
-your IdP includes `none` before `RS256`/`HS256` in the list of signing algorithms, then this value **MUST** be set.
-
-##### User & Group Provisioning (JIT Provisioning)
+### User & Group Provisioning (JIT Provisioning)
By default, DataHub will optimistically attempt to provision users and groups that do not already exist at the time of login.
For users, we extract information like first name, last name, display name, & email to construct a basic user profile. If a groups claim is present,
@@ -160,26 +302,30 @@ AUTH_OIDC_EXTRACT_GROUPS_ENABLED=false
AUTH_OIDC_GROUPS_CLAIM=
```
-- `AUTH_OIDC_JIT_PROVISIONING_ENABLED`: Whether DataHub users & groups should be provisioned on login if they do not exist. Defaults to true.
-- `AUTH_OIDC_PRE_PROVISIONING_REQUIRED`: Whether the user should already exist in DataHub when they login, failing login if they are not. This is appropriate for situations in which users and groups are batch ingested and tightly controlled inside your environment. Defaults to false.
-- `AUTH_OIDC_EXTRACT_GROUPS_ENABLED`: Only applies if `AUTH_OIDC_JIT_PROVISIONING_ENABLED` is set to true. This determines whether we should attempt to extract a list of group names from a particular claim in the OIDC attributes. Note that if this is enabled, each login will re-sync group membership with the groups in your Identity Provider, clearing the group membership that has been assigned through the DataHub UI. Enable with care! Defaults to false.
-- `AUTH_OIDC_GROUPS_CLAIM`: Only applies if `AUTH_OIDC_EXTRACT_GROUPS_ENABLED` is set to true. This determines which OIDC claims will contain a list of string group names. Accepts multiple claim names with comma-separated values. I.e: `groups, teams, departments`. Defaults to 'groups'.
+| Configuration | Description | Default |
+| ----------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ------- |
+| AUTH_OIDC_JIT_PROVISIONING_ENABLED | Whether DataHub users & groups should be provisioned on login if they do not exist. | true |
+| AUTH_OIDC_PRE_PROVISIONING_REQUIRED | Whether the user should already exist in DataHub when they login, failing login if they are not. This is appropriate for situations in which users and groups are batch ingested and tightly controlled inside your environment. | false |
+| AUTH_OIDC_EXTRACT_GROUPS_ENABLED | Only applies if `AUTH_OIDC_JIT_PROVISIONING_ENABLED` is set to true. This determines whether we should attempt to extract a list of group names from a particular claim in the OIDC attributes. Note that if this is enabled, each login will re-sync group membership with the groups in your Identity Provider, clearing the group membership that has been assigned through the DataHub UI. Enable with care! | false |
+| AUTH_OIDC_GROUPS_CLAIM | Only applies if `AUTH_OIDC_EXTRACT_GROUPS_ENABLED` is set to true. This determines which OIDC claims will contain a list of string group names. Accepts multiple claim names with comma-separated values. I.e: `groups, teams, departments`. | groups |
+## 4. Restart datahub-frontend-react
-Once configuration has been updated, `datahub-frontend-react` will need to be restarted to pick up the new environment variables:
+Once configured, restarting the `datahub-frontend-react` container will enable an indirect authentication flow in which DataHub delegates authentication to the specified identity provider.
```
docker-compose -p datahub -f docker-compose.yml -f docker-compose.override.yml up datahub-frontend-react
```
->Note that by default, enabling OIDC will *not* disable the dummy JAAS authentication path, which can be reached at the `/login`
-route of the React app. To disable this authentication path, additionally specify the following config:
-> `AUTH_JAAS_ENABLED=false`
+Navigate to your DataHub domain to see SSO in action.
-### Summary
+:::caution
+By default, enabling OIDC will _not_ disable the dummy JAAS authentication path, which can be reached at the `/login`
+route of the React app. To disable this authentication path, additionally specify the following config:
+`AUTH_JAAS_ENABLED=false`
+:::
-Once configured, deploying the `datahub-frontend-react` container will enable an indirect authentication flow in which DataHub delegates
-authentication to the specified identity provider.
+## Summary
Once a user is authenticated by the identity provider, DataHub will extract a username from the provided claims
and grant DataHub access to the user by setting a pair of session cookies.
@@ -196,44 +342,45 @@ A brief summary of the steps that occur when the user navigates to the React app
7. DataHub sets session cookies for the newly authenticated user
8. DataHub redirects the user to the homepage ("/")
-## FAQ
+## Troubleshooting
-**No users can log in. Instead, I get redirected to the login page with an error. What do I do?**
+
+No users can log in. Instead, I get redirected to the login page with an error. What do I do?
This can occur for a variety of reasons, but most often it is due to misconfiguration of Single-Sign On, either on the DataHub
-side or on the Identity Provider side.
-
-First, verify that all values are consistent across them (e.g. the host URL where DataHub is deployed), and that no values
-are misspelled (client id, client secret).
+side or on the Identity Provider side.
-Next, verify that the scopes requested are supported by your Identity Provider
-and that the claim (i.e. attribute) DataHub uses for uniquely identifying the user is supported by your Identity Provider (refer to Identity Provider OpenID Connect documentation). By default, this claim is `email`.
+- Verify that all values are consistent across them (e.g. the host URL where DataHub is deployed), and that no values are misspelled (client id, client secret).
+- Verify that the scopes requested are supported by your Identity Provider and that the claim (i.e. attribute) DataHub uses for uniquely identifying the user is supported by your Identity Provider (refer to Identity Provider OpenID Connect documentation). By default, this claim is `email`.
+- Make sure the Discovery URI you've configured (`AUTH_OIDC_DISCOVERY_URI`) is accessible where the datahub-frontend container is running. You can do this by issuing a basic CURL to the address (**Pro-Tip**: you may also visit the address in your browser to check more specific details about your Identity Provider).
+- Check the container logs for the `datahub-frontend` container. This should hopefully provide some additional context around why exactly the login handoff is not working.
-Then, make sure the Discovery URI you've configured (`AUTH_OIDC_DISCOVERY_URI`) is accessible where the datahub-frontend container is running. You
-can do this by issuing a basic CURL to the address (**Pro-Tip**: you may also visit the address in your browser to check more specific details about your Identity Provider).
+If all else fails, feel free to reach out to the DataHub Community on Slack for real-time support.
-Finally, check the container logs for the `datahub-frontend` container. This should hopefully provide some additional context
-around why exactly the login handoff is not working.
+
-If all else fails, feel free to reach out to the DataHub Community on Slack for
-real-time support
-
-
-
-**I'm seeing an error in the `datahub-frontend` logs when a user tries to login**
-```shell
-Caused by: java.lang.RuntimeException: Failed to resolve user name claim from profile provided by Identity Provider. Missing attribute. Attribute: 'email', Regex: '(.*)', Profile: { ...
-```
-**what do I do?**
+
+
+I'm seeing an error in the `datahub-frontend` logs when a user tries to login: Caused by: java.lang.RuntimeException: Failed to resolve user name claim from profile provided by Identity Provider. Missing attribute. Attribute: 'email', Regex: '(.*)', Profile: { ....
+
This indicates that your Identity Provider does not provide the claim with name 'email', which DataHub
uses by default to uniquely identify users within your organization.
-To fix this, you may need to
+To fix this, you may need to
-1. Change the claim that is used as the unique user identifier to something else by changing the `AUTH_OIDC_USER_NAME_CLAIM` (e.g. to "name" or "preferred_username") _OR_
+1. Change the claim that is used as the unique user identifier to something else by changing the `AUTH_OIDC_USER_NAME_CLAIM` (e.g. to "name" or "preferred*username") \_OR*
2. Change the environment variable `AUTH_OIDC_SCOPE` to include the scope required to retrieve the claim with name "email"
-For the `datahub-frontend` container / pod.
+For the `datahub-frontend` container / pod.
+
+
+
+## Reference
-**Pro-Tip**: Check the documentation for your Identity Provider to learn more about the scope claims supported.
+Check the documentation for your Identity Provider to learn more about the scope claims supported.
+
+- [Registering an App in Okta](https://developer.okta.com/docs/guides/add-an-external-idp/openidconnect/main/)
+- [OpenID Connect in Google Identity](https://developers.google.com/identity/protocols/oauth2/openid-connect)
+- [OpenID Connect authentication with Azure Active Directory](https://docs.microsoft.com/en-us/azure/active-directory/fundamentals/auth-oidc)
+- [Keycloak - Securing Applications and Services Guide](https://www.keycloak.org/docs/latest/securing_apps/)
From ec9725026dca7b89d6a6464ea9b5c547debf42e5 Mon Sep 17 00:00:00 2001
From: Harshal Sheth
Date: Thu, 2 Nov 2023 09:39:08 -0700
Subject: [PATCH 048/792] chore(ingest): remove legacy memory_leak_detector
(#9158)
---
.../src/datahub/cli/ingest_cli.py | 4 -
metadata-ingestion/src/datahub/entrypoints.py | 15 ---
.../ingestion/source/looker/looker_config.py | 6 +-
.../datahub/utilities/memory_leak_detector.py | 106 ------------------
.../tests/integration/snowflake/common.py | 3 +-
.../tests/unit/test_snowflake_source.py | 15 +--
6 files changed, 10 insertions(+), 139 deletions(-)
delete mode 100644 metadata-ingestion/src/datahub/utilities/memory_leak_detector.py
diff --git a/metadata-ingestion/src/datahub/cli/ingest_cli.py b/metadata-ingestion/src/datahub/cli/ingest_cli.py
index 9b5716408f3e4..dd0287004a368 100644
--- a/metadata-ingestion/src/datahub/cli/ingest_cli.py
+++ b/metadata-ingestion/src/datahub/cli/ingest_cli.py
@@ -27,7 +27,6 @@
from datahub.ingestion.run.pipeline import Pipeline
from datahub.telemetry import telemetry
from datahub.upgrade import upgrade
-from datahub.utilities import memory_leak_detector
logger = logging.getLogger(__name__)
@@ -98,7 +97,6 @@ def ingest() -> None:
@click.option(
"--no-spinner", type=bool, is_flag=True, default=False, help="Turn off spinner"
)
-@click.pass_context
@telemetry.with_telemetry(
capture_kwargs=[
"dry_run",
@@ -109,9 +107,7 @@ def ingest() -> None:
"no_spinner",
]
)
-@memory_leak_detector.with_leak_detection
def run(
- ctx: click.Context,
config: str,
dry_run: bool,
preview: bool,
diff --git a/metadata-ingestion/src/datahub/entrypoints.py b/metadata-ingestion/src/datahub/entrypoints.py
index 5bfab3b841fa3..0cd37cc939854 100644
--- a/metadata-ingestion/src/datahub/entrypoints.py
+++ b/metadata-ingestion/src/datahub/entrypoints.py
@@ -70,21 +70,10 @@
version=datahub_package.nice_version_name(),
prog_name=datahub_package.__package_name__,
)
-@click.option(
- "-dl",
- "--detect-memory-leaks",
- type=bool,
- is_flag=True,
- default=False,
- help="Run memory leak detection.",
-)
-@click.pass_context
def datahub(
- ctx: click.Context,
debug: bool,
log_file: Optional[str],
debug_vars: bool,
- detect_memory_leaks: bool,
) -> None:
if debug_vars:
# debug_vars implies debug. This option isn't actually used here, but instead
@@ -109,10 +98,6 @@ def datahub(
_logging_configured = configure_logging(debug=debug, log_file=log_file)
_logging_configured.__enter__()
- # Setup the context for the memory_leak_detector decorator.
- ctx.ensure_object(dict)
- ctx.obj["detect_memory_leaks"] = detect_memory_leaks
-
@datahub.command()
@telemetry.with_telemetry()
diff --git a/metadata-ingestion/src/datahub/ingestion/source/looker/looker_config.py b/metadata-ingestion/src/datahub/ingestion/source/looker/looker_config.py
index 96c405f7257d0..98d58c9fc9d87 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/looker/looker_config.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/looker/looker_config.py
@@ -121,7 +121,10 @@ class LookerCommonConfig(DatasetSourceConfigMixin):
"discoverable. When disabled, adds this information to the description of the column.",
)
platform_name: str = Field(
- "looker", description="Default platform name. Don't change."
+ # TODO: This shouldn't be part of the config.
+ "looker",
+ description="Default platform name.",
+ hidden_from_docs=True,
)
extract_column_level_lineage: bool = Field(
True,
@@ -213,7 +216,6 @@ def external_url_defaults_to_api_config_base_url(
def stateful_ingestion_should_be_enabled(
cls, v: Optional[bool], *, values: Dict[str, Any], **kwargs: Dict[str, Any]
) -> Optional[bool]:
-
stateful_ingestion: StatefulStaleMetadataRemovalConfig = cast(
StatefulStaleMetadataRemovalConfig, values.get("stateful_ingestion")
)
diff --git a/metadata-ingestion/src/datahub/utilities/memory_leak_detector.py b/metadata-ingestion/src/datahub/utilities/memory_leak_detector.py
deleted file mode 100644
index 85ad0fb4938eb..0000000000000
--- a/metadata-ingestion/src/datahub/utilities/memory_leak_detector.py
+++ /dev/null
@@ -1,106 +0,0 @@
-import fnmatch
-import gc
-import logging
-import sys
-import tracemalloc
-from collections import defaultdict
-from functools import wraps
-from typing import Any, Callable, Dict, List, TypeVar, Union, cast
-
-import click
-from typing_extensions import Concatenate, ParamSpec
-
-logger = logging.getLogger(__name__)
-T = TypeVar("T")
-P = ParamSpec("P")
-
-
-def _trace_has_file(trace: tracemalloc.Traceback, file_pattern: str) -> bool:
- for frame_index in range(len(trace)):
- cur_frame = trace[frame_index]
- if fnmatch.fnmatch(cur_frame.filename, file_pattern):
- return True
- return False
-
-
-def _init_leak_detection() -> None:
- # Initialize trace malloc to track up to 25 stack frames.
- tracemalloc.start(25)
- if sys.version_info >= (3, 9):
- # Nice to reset peak to 0. Available for versions >= 3.9.
- tracemalloc.reset_peak()
- # Enable leak debugging in the garbage collector.
- gc.set_debug(gc.DEBUG_LEAK)
-
-
-def _perform_leak_detection() -> None:
- # Log potentially useful memory usage metrics
- logger.info(f"GC count before collect {gc.get_count()}")
- traced_memory_size, traced_memory_peak = tracemalloc.get_traced_memory()
- logger.info(f"Traced Memory: size={traced_memory_size}, peak={traced_memory_peak}")
- num_unreacheable_objects = gc.collect()
- logger.info(f"Number of unreachable objects = {num_unreacheable_objects}")
- logger.info(f"GC count after collect {gc.get_count()}")
-
- # Collect unique traces of all live objects in the garbage - these have potential leaks.
- unique_traces_to_objects: Dict[
- Union[tracemalloc.Traceback, int], List[object]
- ] = defaultdict(list)
- for obj in gc.garbage:
- obj_trace = tracemalloc.get_object_traceback(obj)
- if obj_trace is not None:
- if _trace_has_file(obj_trace, "*datahub/*.py"):
- # Leaking object
- unique_traces_to_objects[obj_trace].append(obj)
- else:
- unique_traces_to_objects[id(obj)].append(obj)
- logger.info("Potentially leaking objects start")
- for key, obj_list in sorted(
- unique_traces_to_objects.items(),
- key=lambda item: sum(
- [sys.getsizeof(o) for o in item[1]]
- ), # TODO: add support for deep sizeof
- reverse=True,
- ):
- if isinstance(key, tracemalloc.Traceback):
- obj_traceback: tracemalloc.Traceback = cast(tracemalloc.Traceback, key)
- logger.info(
- f"#Objects:{len(obj_list)}; Total memory:{sum([sys.getsizeof(obj) for obj in obj_list])};"
- + " Allocation Trace:\n\t"
- + "\n\t".join(obj_traceback.format(limit=25))
- )
- else:
- logger.info(
- f"#Objects:{len(obj_list)}; Total memory:{sum([sys.getsizeof(obj) for obj in obj_list])};"
- + " No Allocation Trace available!"
- )
- logger.info("Potentially leaking objects end")
-
- tracemalloc.stop()
-
-
-def with_leak_detection(
- func: Callable[Concatenate[click.Context, P], T]
-) -> Callable[Concatenate[click.Context, P], T]:
- @wraps(func)
- def wrapper(ctx: click.Context, *args: P.args, **kwargs: P.kwargs) -> Any:
- detect_leaks: bool = ctx.obj.get("detect_memory_leaks", False)
- if detect_leaks:
- logger.info(
- f"Initializing memory leak detection on command: {func.__module__}.{func.__name__}"
- )
- _init_leak_detection()
-
- try:
- return func(ctx, *args, **kwargs)
- finally:
- if detect_leaks:
- logger.info(
- f"Starting memory leak detection on command: {func.__module__}.{func.__name__}"
- )
- _perform_leak_detection()
- logger.info(
- f"Finished memory leak detection on command: {func.__module__}.{func.__name__}"
- )
-
- return wrapper
diff --git a/metadata-ingestion/tests/integration/snowflake/common.py b/metadata-ingestion/tests/integration/snowflake/common.py
index ff448eca01071..78e5499697311 100644
--- a/metadata-ingestion/tests/integration/snowflake/common.py
+++ b/metadata-ingestion/tests/integration/snowflake/common.py
@@ -565,5 +565,4 @@ def default_query_results( # noqa: C901
"DOMAIN": "DATABASE",
},
]
- # Unreachable code
- raise Exception(f"Unknown query {query}")
+ raise ValueError(f"Unexpected query: {query}")
diff --git a/metadata-ingestion/tests/unit/test_snowflake_source.py b/metadata-ingestion/tests/unit/test_snowflake_source.py
index 888a7c0441554..aaff878b81eee 100644
--- a/metadata-ingestion/tests/unit/test_snowflake_source.py
+++ b/metadata-ingestion/tests/unit/test_snowflake_source.py
@@ -368,8 +368,7 @@ def default_query_results(query):
return [('{"roles":"","value":""}',)]
elif query == "select current_warehouse()":
return [("TEST_WAREHOUSE")]
- # Unreachable code
- raise Exception()
+ raise ValueError(f"Unexpected query: {query}")
connection_mock = MagicMock()
cursor_mock = MagicMock()
@@ -397,8 +396,7 @@ def query_results(query):
]
elif query == 'show grants to role "PUBLIC"':
return []
- # Unreachable code
- raise Exception()
+ raise ValueError(f"Unexpected query: {query}")
config = {
"username": "user",
@@ -441,8 +439,7 @@ def query_results(query):
return [("", "USAGE", "DATABASE", "DB1")]
elif query == 'show grants to role "PUBLIC"':
return []
- # Unreachable code
- raise Exception()
+ raise ValueError(f"Unexpected query: {query}")
setup_mock_connect(mock_connect, query_results)
@@ -485,8 +482,7 @@ def query_results(query):
]
elif query == 'show grants to role "PUBLIC"':
return []
- # Unreachable code
- raise Exception()
+ raise ValueError(f"Unexpected query: {query}")
setup_mock_connect(mock_connect, query_results)
@@ -536,8 +532,7 @@ def query_results(query):
["", "USAGE", "VIEW", "SNOWFLAKE.ACCOUNT_USAGE.ACCESS_HISTORY"],
["", "USAGE", "VIEW", "SNOWFLAKE.ACCOUNT_USAGE.OBJECT_DEPENDENCIES"],
]
- # Unreachable code
- raise Exception()
+ raise ValueError(f"Unexpected query: {query}")
setup_mock_connect(mock_connect, query_results)
From 148ad1ad9f00d6eb43d6acb270b9a90a745c8af3 Mon Sep 17 00:00:00 2001
From: Harshal Sheth
Date: Thu, 2 Nov 2023 09:44:35 -0700
Subject: [PATCH 049/792] feat(ingest/looker): support emitting unused explores
(#9159)
---
.../ingestion/source/looker/looker_common.py | 2 +-
.../ingestion/source/looker/looker_config.py | 4 ++
.../source/looker/looker_lib_wrapper.py | 7 +++
.../ingestion/source/looker/looker_source.py | 46 +++++++++++++------
4 files changed, 45 insertions(+), 14 deletions(-)
diff --git a/metadata-ingestion/src/datahub/ingestion/source/looker/looker_common.py b/metadata-ingestion/src/datahub/ingestion/source/looker/looker_common.py
index 30c38720dd96c..7ca5ce49019ab 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/looker/looker_common.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/looker/looker_common.py
@@ -388,7 +388,7 @@ def _get_field_type(
# if still not found, log and continue
if type_class is None:
- logger.info(
+ logger.debug(
f"The type '{native_type}' is not recognized for field type, setting as NullTypeClass.",
)
type_class = NullTypeClass
diff --git a/metadata-ingestion/src/datahub/ingestion/source/looker/looker_config.py b/metadata-ingestion/src/datahub/ingestion/source/looker/looker_config.py
index 98d58c9fc9d87..e6ddea9a30489 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/looker/looker_config.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/looker/looker_config.py
@@ -205,6 +205,10 @@ class LookerDashboardSourceConfig(
False,
description="Extract looks which are not part of any Dashboard. To enable this flag the stateful_ingestion should also be enabled.",
)
+ emit_used_explores_only: bool = Field(
+ True,
+ description="When enabled, only explores that are used by a Dashboard/Look will be ingested.",
+ )
@validator("external_base_url", pre=True, always=True)
def external_url_defaults_to_api_config_base_url(
diff --git a/metadata-ingestion/src/datahub/ingestion/source/looker/looker_lib_wrapper.py b/metadata-ingestion/src/datahub/ingestion/source/looker/looker_lib_wrapper.py
index b00f74b71e792..988caba1c0d74 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/looker/looker_lib_wrapper.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/looker/looker_lib_wrapper.py
@@ -59,6 +59,7 @@ class LookerAPIStats(BaseModel):
lookml_model_calls: int = 0
all_dashboards_calls: int = 0
all_looks_calls: int = 0
+ all_models_calls: int = 0
get_query_calls: int = 0
search_looks_calls: int = 0
search_dashboards_calls: int = 0
@@ -155,6 +156,12 @@ def dashboard(self, dashboard_id: str, fields: Union[str, List[str]]) -> Dashboa
transport_options=self.transport_options,
)
+ def all_lookml_models(self) -> Sequence[LookmlModel]:
+ self.client_stats.all_models_calls += 1
+ return self.client.all_lookml_models(
+ transport_options=self.transport_options,
+ )
+
def lookml_model_explore(self, model: str, explore_name: str) -> LookmlModelExplore:
self.client_stats.explore_calls += 1
return self.client.lookml_model_explore(
diff --git a/metadata-ingestion/src/datahub/ingestion/source/looker/looker_source.py b/metadata-ingestion/src/datahub/ingestion/source/looker/looker_source.py
index 09683d790c14c..4a98e8874bca0 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/looker/looker_source.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/looker/looker_source.py
@@ -147,9 +147,12 @@ def __init__(self, config: LookerDashboardSourceConfig, ctx: PipelineContext):
)
self.reporter._looker_explore_registry = self.explore_registry
self.reporter._looker_api = self.looker_api
+
self.reachable_look_registry = set()
- self.explores_to_fetch_set: Dict[Tuple[str, str], List[str]] = {}
+ # (model, explore) -> list of charts/looks/dashboards that reference this explore
+ # The list values are used purely for debugging purposes.
+ self.reachable_explores: Dict[Tuple[str, str], List[str]] = {}
# Keep stat generators to generate entity stat aspect later
stat_generator_config: looker_usage.StatGeneratorConfig = (
@@ -378,11 +381,11 @@ def _get_input_fields_from_query(
return result
- def add_explore_to_fetch(self, model: str, explore: str, via: str) -> None:
- if (model, explore) not in self.explores_to_fetch_set:
- self.explores_to_fetch_set[(model, explore)] = []
+ def add_reachable_explore(self, model: str, explore: str, via: str) -> None:
+ if (model, explore) not in self.reachable_explores:
+ self.reachable_explores[(model, explore)] = []
- self.explores_to_fetch_set[(model, explore)].append(via)
+ self.reachable_explores[(model, explore)].append(via)
def _get_looker_dashboard_element( # noqa: C901
self, element: DashboardElement
@@ -403,7 +406,7 @@ def _get_looker_dashboard_element( # noqa: C901
f"Element {element.title}: Explores added via query: {explores}"
)
for exp in explores:
- self.add_explore_to_fetch(
+ self.add_reachable_explore(
model=element.query.model,
explore=exp,
via=f"look:{element.look_id}:query:{element.dashboard_id}",
@@ -439,7 +442,7 @@ def _get_looker_dashboard_element( # noqa: C901
explores = [element.look.query.view]
logger.debug(f"Element {title}: Explores added via look: {explores}")
for exp in explores:
- self.add_explore_to_fetch(
+ self.add_reachable_explore(
model=element.look.query.model,
explore=exp,
via=f"Look:{element.look_id}:query:{element.dashboard_id}",
@@ -483,7 +486,7 @@ def _get_looker_dashboard_element( # noqa: C901
)
for exp in explores:
- self.add_explore_to_fetch(
+ self.add_reachable_explore(
model=element.result_maker.query.model,
explore=exp,
via=f"Look:{element.look_id}:resultmaker:query",
@@ -495,7 +498,7 @@ def _get_looker_dashboard_element( # noqa: C901
if filterable.view is not None and filterable.model is not None:
model = filterable.model
explores.append(filterable.view)
- self.add_explore_to_fetch(
+ self.add_reachable_explore(
model=filterable.model,
explore=filterable.view,
via=f"Look:{element.look_id}:resultmaker:filterable",
@@ -694,20 +697,26 @@ def _make_dashboard_metadata_events(
def _make_explore_metadata_events(
self,
) -> Iterable[Union[MetadataChangeEvent, MetadataChangeProposalWrapper]]:
+ if self.source_config.emit_used_explores_only:
+ explores_to_fetch = list(self.reachable_explores.keys())
+ else:
+ explores_to_fetch = list(self.list_all_explores())
+ explores_to_fetch.sort()
+
with concurrent.futures.ThreadPoolExecutor(
max_workers=self.source_config.max_threads
) as async_executor:
- self.reporter.total_explores = len(self.explores_to_fetch_set)
+ self.reporter.total_explores = len(explores_to_fetch)
explore_futures = {
async_executor.submit(self.fetch_one_explore, model, explore): (
model,
explore,
)
- for (model, explore) in self.explores_to_fetch_set
+ for (model, explore) in explores_to_fetch
}
- for future in concurrent.futures.as_completed(explore_futures):
+ for future in concurrent.futures.wait(explore_futures).done:
events, explore_id, start_time, end_time = future.result()
del explore_futures[future]
self.reporter.explores_scanned += 1
@@ -717,6 +726,17 @@ def _make_explore_metadata_events(
f"Running time of fetch_one_explore for {explore_id}: {(end_time - start_time).total_seconds()}"
)
+ def list_all_explores(self) -> Iterable[Tuple[str, str]]:
+ # returns a list of (model, explore) tuples
+
+ for model in self.looker_api.all_lookml_models():
+ if model.name is None or model.explores is None:
+ continue
+ for explore in model.explores:
+ if explore.name is None:
+ continue
+ yield (model.name, explore.name)
+
def fetch_one_explore(
self, model: str, explore: str
) -> Tuple[
@@ -954,7 +974,7 @@ def _input_fields_from_dashboard_element(
)
if explore is not None:
# add this to the list of explores to finally generate metadata for
- self.add_explore_to_fetch(
+ self.add_reachable_explore(
input_field.model, input_field.explore, entity_urn
)
entity_urn = explore.get_explore_urn(self.source_config)
From 7ff48b37aaea165ba3c3cb6f9f9f742ea2e37654 Mon Sep 17 00:00:00 2001
From: david-leifker <114954101+david-leifker@users.noreply.github.com>
Date: Fri, 3 Nov 2023 10:23:37 -0500
Subject: [PATCH 050/792] refactor(policy): refactor policy locking, no
functional difference (#9163)
---
.../authorization/DataHubAuthorizer.java | 111 +++++++++---------
1 file changed, 55 insertions(+), 56 deletions(-)
diff --git a/metadata-service/auth-impl/src/main/java/com/datahub/authorization/DataHubAuthorizer.java b/metadata-service/auth-impl/src/main/java/com/datahub/authorization/DataHubAuthorizer.java
index e30fb93109915..f8b28f6c182a7 100644
--- a/metadata-service/auth-impl/src/main/java/com/datahub/authorization/DataHubAuthorizer.java
+++ b/metadata-service/auth-impl/src/main/java/com/datahub/authorization/DataHubAuthorizer.java
@@ -19,6 +19,7 @@
import java.util.concurrent.Executors;
import java.util.concurrent.ScheduledExecutorService;
import java.util.concurrent.TimeUnit;
+import java.util.concurrent.locks.Lock;
import java.util.concurrent.locks.ReadWriteLock;
import java.util.concurrent.locks.ReentrantReadWriteLock;
import javax.annotation.Nonnull;
@@ -55,7 +56,8 @@ public enum AuthorizationMode {
// Maps privilege name to the associated set of policies for fast access.
// Not concurrent data structure because writes are always against the entire thing.
private final Map> _policyCache = new HashMap<>(); // Shared Policy Cache.
- private final ReadWriteLock _lockPolicyCache = new ReentrantReadWriteLock();
+ private final ReadWriteLock readWriteLock = new ReentrantReadWriteLock();
+ private final Lock readLock = readWriteLock.readLock();
private final ScheduledExecutorService _refreshExecutorService = Executors.newScheduledThreadPool(1);
private final PolicyRefreshRunnable _policyRefreshRunnable;
@@ -74,7 +76,7 @@ public DataHubAuthorizer(
_systemAuthentication = Objects.requireNonNull(systemAuthentication);
_mode = Objects.requireNonNull(mode);
_policyEngine = new PolicyEngine(systemAuthentication, Objects.requireNonNull(entityClient));
- _policyRefreshRunnable = new PolicyRefreshRunnable(systemAuthentication, new PolicyFetcher(entityClient), _policyCache, _lockPolicyCache);
+ _policyRefreshRunnable = new PolicyRefreshRunnable(systemAuthentication, new PolicyFetcher(entityClient), _policyCache, readWriteLock.writeLock());
_refreshExecutorService.scheduleAtFixedRate(_policyRefreshRunnable, delayIntervalSeconds, refreshIntervalSeconds, TimeUnit.SECONDS);
}
@@ -93,41 +95,30 @@ public AuthorizationResult authorize(@Nonnull final AuthorizationRequest request
Optional resolvedResourceSpec = request.getResourceSpec().map(_entitySpecResolver::resolve);
- _lockPolicyCache.readLock().lock();
- try {
- // 1. Fetch the policies relevant to the requested privilege.
- final List policiesToEvaluate = _policyCache.getOrDefault(request.getPrivilege(), new ArrayList<>());
-
- // 2. Evaluate each policy.
- for (DataHubPolicyInfo policy : policiesToEvaluate) {
- if (isRequestGranted(policy, request, resolvedResourceSpec)) {
- // Short circuit if policy has granted privileges to this actor.
- return new AuthorizationResult(request, AuthorizationResult.Type.ALLOW,
- String.format("Granted by policy with type: %s", policy.getType()));
- }
+ // 1. Fetch the policies relevant to the requested privilege.
+ final List policiesToEvaluate = getOrDefault(request.getPrivilege(), new ArrayList<>());
+
+ // 2. Evaluate each policy.
+ for (DataHubPolicyInfo policy : policiesToEvaluate) {
+ if (isRequestGranted(policy, request, resolvedResourceSpec)) {
+ // Short circuit if policy has granted privileges to this actor.
+ return new AuthorizationResult(request, AuthorizationResult.Type.ALLOW,
+ String.format("Granted by policy with type: %s", policy.getType()));
}
- return new AuthorizationResult(request, AuthorizationResult.Type.DENY, null);
- } finally {
- _lockPolicyCache.readLock().unlock();
}
+ return new AuthorizationResult(request, AuthorizationResult.Type.DENY, null);
}
public List getGrantedPrivileges(final String actor, final Optional resourceSpec) {
+ // 1. Fetch all policies
+ final List policiesToEvaluate = getOrDefault(ALL, new ArrayList<>());
- _lockPolicyCache.readLock().lock();
- try {
- // 1. Fetch all policies
- final List policiesToEvaluate = _policyCache.getOrDefault(ALL, new ArrayList<>());
-
- Urn actorUrn = UrnUtils.getUrn(actor);
- final ResolvedEntitySpec resolvedActorSpec = _entitySpecResolver.resolve(new EntitySpec(actorUrn.getEntityType(), actor));
+ Urn actorUrn = UrnUtils.getUrn(actor);
+ final ResolvedEntitySpec resolvedActorSpec = _entitySpecResolver.resolve(new EntitySpec(actorUrn.getEntityType(), actor));
- Optional resolvedResourceSpec = resourceSpec.map(_entitySpecResolver::resolve);
+ Optional resolvedResourceSpec = resourceSpec.map(_entitySpecResolver::resolve);
- return _policyEngine.getGrantedPrivileges(policiesToEvaluate, resolvedActorSpec, resolvedResourceSpec);
- } finally {
- _lockPolicyCache.readLock().unlock();
- }
+ return _policyEngine.getGrantedPrivileges(policiesToEvaluate, resolvedActorSpec, resolvedResourceSpec);
}
/**
@@ -143,36 +134,31 @@ public AuthorizedActors authorizedActors(
boolean allUsers = false;
boolean allGroups = false;
- _lockPolicyCache.readLock().lock();
- try {
- // Step 1: Find policies granting the privilege.
- final List policiesToEvaluate = _policyCache.getOrDefault(privilege, new ArrayList<>());
-
- Optional resolvedResourceSpec = resourceSpec.map(_entitySpecResolver::resolve);
+ // Step 1: Find policies granting the privilege.
+ final List policiesToEvaluate = getOrDefault(privilege, new ArrayList<>());
+ Optional resolvedResourceSpec = resourceSpec.map(_entitySpecResolver::resolve);
- // Step 2: For each policy, determine whether the resource is a match.
- for (DataHubPolicyInfo policy : policiesToEvaluate) {
- if (!PoliciesConfig.ACTIVE_POLICY_STATE.equals(policy.getState())) {
- // Policy is not active, skip.
- continue;
- }
+ // Step 2: For each policy, determine whether the resource is a match.
+ for (DataHubPolicyInfo policy : policiesToEvaluate) {
+ if (!PoliciesConfig.ACTIVE_POLICY_STATE.equals(policy.getState())) {
+ // Policy is not active, skip.
+ continue;
+ }
- final PolicyEngine.PolicyActors matchingActors = _policyEngine.getMatchingActors(policy, resolvedResourceSpec);
+ final PolicyEngine.PolicyActors matchingActors = _policyEngine.getMatchingActors(policy, resolvedResourceSpec);
- // Step 3: For each matching policy, add actors that are authorized.
- authorizedUsers.addAll(matchingActors.getUsers());
- authorizedGroups.addAll(matchingActors.getGroups());
- if (matchingActors.allUsers()) {
- allUsers = true;
- }
- if (matchingActors.allGroups()) {
- allGroups = true;
- }
+ // Step 3: For each matching policy, add actors that are authorized.
+ authorizedUsers.addAll(matchingActors.getUsers());
+ authorizedGroups.addAll(matchingActors.getGroups());
+ if (matchingActors.allUsers()) {
+ allUsers = true;
+ }
+ if (matchingActors.allGroups()) {
+ allGroups = true;
}
- } finally {
- _lockPolicyCache.readLock().unlock();
}
+
// Step 4: Return all authorized users and groups.
return new AuthorizedActors(privilege, authorizedUsers, authorizedGroups, allUsers, allGroups);
}
@@ -234,6 +220,16 @@ private Optional getUrnFromRequestActor(String actor) {
}
}
+ private List getOrDefault(String key, List defaultValue) {
+ readLock.lock();
+ try {
+ return _policyCache.getOrDefault(key, defaultValue);
+ } finally {
+ // To unlock the acquired read thread
+ readLock.unlock();
+ }
+ }
+
/**
* A {@link Runnable} used to periodically fetch a new instance of the policies Cache.
*
@@ -247,7 +243,7 @@ static class PolicyRefreshRunnable implements Runnable {
private final Authentication _systemAuthentication;
private final PolicyFetcher _policyFetcher;
private final Map> _policyCache;
- private final ReadWriteLock _lockPolicyCache;
+ private final Lock writeLock;
@Override
public void run() {
@@ -274,13 +270,16 @@ public void run() {
return;
}
}
- _lockPolicyCache.writeLock().lock();
+
+ writeLock.lock();
try {
_policyCache.clear();
_policyCache.putAll(newCache);
} finally {
- _lockPolicyCache.writeLock().unlock();
+ // To unlock the acquired write thread
+ writeLock.unlock();
}
+
log.debug(String.format("Successfully fetched %s policies.", total));
} catch (Exception e) {
log.error("Caught exception while loading Policy cache. Will retry on next scheduled attempt.", e);
From 07311115c5ca436f64fad9c685cfc586cc5d4180 Mon Sep 17 00:00:00 2001
From: Kos Korchak <97058061+kkorchak@users.noreply.github.com>
Date: Fri, 3 Nov 2023 13:00:15 -0400
Subject: [PATCH 051/792] API test for managing access token privilege (#9167)
---
.../tests/privileges/test_privileges.py | 155 ++++++++++++++----
1 file changed, 127 insertions(+), 28 deletions(-)
diff --git a/smoke-test/tests/privileges/test_privileges.py b/smoke-test/tests/privileges/test_privileges.py
index 13d6b6cf3415a..740311754678e 100644
--- a/smoke-test/tests/privileges/test_privileges.py
+++ b/smoke-test/tests/privileges/test_privileges.py
@@ -52,6 +52,20 @@ def privileges_and_test_user_setup(admin_session):
wait_for_writes_to_sync()
+@tenacity.retry(
+ stop=tenacity.stop_after_attempt(sleep_times), wait=tenacity.wait_fixed(sleep_sec)
+)
+def _ensure_cant_perform_action(session, json,assertion_key):
+ action_response = session.post(
+ f"{get_frontend_url()}/api/v2/graphql", json=json)
+ action_response.raise_for_status()
+ action_data = action_response.json()
+
+ assert action_data["errors"][0]["extensions"]["code"] == 403
+ assert action_data["errors"][0]["extensions"]["type"] == "UNAUTHORIZED"
+ assert action_data["data"][assertion_key] == None
+
+
@tenacity.retry(
stop=tenacity.stop_after_attempt(10), wait=tenacity.wait_fixed(sleep_sec)
)
@@ -67,20 +81,6 @@ def _ensure_can_create_secret(session, json, urn):
assert secret_data["data"]["createSecret"] == urn
-@tenacity.retry(
- stop=tenacity.stop_after_attempt(sleep_times), wait=tenacity.wait_fixed(sleep_sec)
-)
-def _ensure_cant_create_secret(session, json):
- create_secret_response = session.post(
- f"{get_frontend_url()}/api/v2/graphql", json=json)
- create_secret_response.raise_for_status()
- create_secret_data = create_secret_response.json()
-
- assert create_secret_data["errors"][0]["extensions"]["code"] == 403
- assert create_secret_data["errors"][0]["extensions"]["type"] == "UNAUTHORIZED"
- assert create_secret_data["data"]["createSecret"] == None
-
-
@tenacity.retry(
stop=tenacity.stop_after_attempt(10), wait=tenacity.wait_fixed(sleep_sec)
)
@@ -99,17 +99,19 @@ def _ensure_can_create_ingestion_source(session, json):
@tenacity.retry(
- stop=tenacity.stop_after_attempt(sleep_times), wait=tenacity.wait_fixed(sleep_sec)
+ stop=tenacity.stop_after_attempt(10), wait=tenacity.wait_fixed(sleep_sec)
)
-def _ensure_cant_create_ingestion_source(session, json):
- create_source_response = session.post(
+def _ensure_can_create_access_token(session, json):
+ create_access_token_success = session.post(
f"{get_frontend_url()}/api/v2/graphql", json=json)
- create_source_response.raise_for_status()
- create_source_data = create_source_response.json()
+ create_access_token_success.raise_for_status()
+ ingestion_data = create_access_token_success.json()
- assert create_source_data["errors"][0]["extensions"]["code"] == 403
- assert create_source_data["errors"][0]["extensions"]["type"] == "UNAUTHORIZED"
- assert create_source_data["data"]["createIngestionSource"] == None
+ assert ingestion_data
+ assert ingestion_data["data"]
+ assert ingestion_data["data"]["createAccessToken"]
+ assert ingestion_data["data"]["createAccessToken"]["accessToken"] is not None
+ assert ingestion_data["data"]["createAccessToken"]["__typename"] == "AccessToken"
@pytest.mark.dependency(depends=["test_healthchecks"])
@@ -132,7 +134,7 @@ def test_privilege_to_create_and_manage_secrets():
}
},
}
- _ensure_cant_create_secret(user_session, create_secret)
+ _ensure_cant_perform_action(user_session, create_secret,"createSecret")
# Assign privileges to the new user to manage secrets
@@ -166,7 +168,7 @@ def test_privilege_to_create_and_manage_secrets():
remove_policy(policy_urn, admin_session)
# Ensure user can't create secret after policy is removed
- _ensure_cant_create_secret(user_session, create_secret)
+ _ensure_cant_perform_action(user_session, create_secret,"createSecret")
@pytest.mark.dependency(depends=["test_healthchecks"])
@@ -182,11 +184,18 @@ def test_privilege_to_create_and_manage_ingestion_source():
createIngestionSource(input: $input)\n}""",
"variables": {"input":{"type":"snowflake","name":"test","config":
{"recipe":
- "{\"source\":{\"type\":\"snowflake\",\"config\":{\"account_id\":null,\"include_table_lineage\":true,\"include_view_lineage\":true,\"include_tables\":true,\"include_views\":true,\"profiling\":{\"enabled\":true,\"profile_table_level_only\":true},\"stateful_ingestion\":{\"enabled\":true}}}}",
+ """{\"source\":{\"type\":\"snowflake\",\"config\":{
+ \"account_id\":null,
+ \"include_table_lineage\":true,
+ \"include_view_lineage\":true,
+ \"include_tables\":true,
+ \"include_views\":true,
+ \"profiling\":{\"enabled\":true,\"profile_table_level_only\":true},
+ \"stateful_ingestion\":{\"enabled\":true}}}}""",
"executorId":"default","debugMode":False,"extraArgs":[]}}},
}
- _ensure_cant_create_ingestion_source(user_session, create_ingestion_source)
+ _ensure_cant_perform_action(user_session, create_ingestion_source, "createIngestionSource")
# Assign privileges to the new user to manage ingestion source
@@ -201,7 +210,14 @@ def test_privilege_to_create_and_manage_ingestion_source():
updateIngestionSource(urn: $urn, input: $input)\n}""",
"variables": {"urn":ingestion_source_urn,
"input":{"type":"snowflake","name":"test updated",
- "config":{"recipe":"{\"source\":{\"type\":\"snowflake\",\"config\":{\"account_id\":null,\"include_table_lineage\":true,\"include_view_lineage\":true,\"include_tables\":true,\"include_views\":true,\"profiling\":{\"enabled\":true,\"profile_table_level_only\":true},\"stateful_ingestion\":{\"enabled\":true}}}}",
+ "config":{"recipe":"""{\"source\":{\"type\":\"snowflake\",\"config\":{
+ \"account_id\":null,
+ \"include_table_lineage\":true,
+ \"include_view_lineage\":true,
+ \"include_tables\":true,
+ \"include_views\":true,
+ \"profiling\":{\"enabled\":true,\"profile_table_level_only\":true},
+ \"stateful_ingestion\":{\"enabled\":true}}}}""",
"executorId":"default","debugMode":False,"extraArgs":[]}}}
}
@@ -238,4 +254,87 @@ def test_privilege_to_create_and_manage_ingestion_source():
remove_policy(policy_urn, admin_session)
# Ensure that user can't create ingestion source after policy is removed
- _ensure_cant_create_ingestion_source(user_session, create_ingestion_source)
\ No newline at end of file
+ _ensure_cant_perform_action(user_session, create_ingestion_source, "createIngestionSource")
+
+
+@pytest.mark.dependency(depends=["test_healthchecks"])
+def test_privilege_to_create_and_manage_access_tokens():
+
+ (admin_user, admin_pass) = get_admin_credentials()
+ admin_session = login_as(admin_user, admin_pass)
+ user_session = login_as("user", "user")
+
+
+ # Verify new user can't create access token
+ create_access_token = {
+ "query": """mutation createAccessToken($input: CreateAccessTokenInput!) {\n
+ createAccessToken(input: $input) {\n accessToken\n __typename\n }\n}\n""",
+ "variables": {"input":{"actorUrn":"urn:li:corpuser:user",
+ "type":"PERSONAL",
+ "duration":"ONE_MONTH",
+ "name":"test",
+ "description":"test"}}
+ }
+
+ _ensure_cant_perform_action(user_session, create_access_token,"createAccessToken")
+
+
+ # Assign privileges to the new user to create and manage access tokens
+ policy_urn = create_user_policy("urn:li:corpuser:user", ["MANAGE_ACCESS_TOKENS"], admin_session)
+
+
+ # Verify new user can create and manage access token(create, revoke)
+ # Create a access token
+ _ensure_can_create_access_token(user_session, create_access_token)
+
+
+ # List access tokens first to get token id
+ list_access_tokens = {
+ "query": """query listAccessTokens($input: ListAccessTokenInput!) {\n
+ listAccessTokens(input: $input) {\n
+ start\n count\n total\n tokens {\n urn\n type\n
+ id\n name\n description\n actorUrn\n ownerUrn\n
+ createdAt\n expiresAt\n __typename\n }\n __typename\n }\n}\n""",
+ "variables": {
+ "input":{
+ "start":0,"count":10,"filters":[{
+ "field":"ownerUrn",
+ "values":["urn:li:corpuser:user"]}]}
+ }
+ }
+
+ list_tokens_response = user_session.post(f"{get_frontend_url()}/api/v2/graphql", json=list_access_tokens)
+ list_tokens_response.raise_for_status()
+ list_tokens_data = list_tokens_response.json()
+
+ assert list_tokens_data
+ assert list_tokens_data["data"]
+ assert list_tokens_data["data"]["listAccessTokens"]["tokens"][0]["id"] is not None
+
+ access_token_id = list_tokens_data["data"]["listAccessTokens"]["tokens"][0]["id"]
+
+
+ # Revoke access token
+ revoke_access_token = {
+ "query": "mutation revokeAccessToken($tokenId: String!) {\n revokeAccessToken(tokenId: $tokenId)\n}\n",
+ "variables": {
+ "tokenId": access_token_id
+ },
+ }
+
+ revoke_token_response = user_session.post(f"{get_frontend_url()}/api/v2/graphql", json=revoke_access_token)
+ revoke_token_response.raise_for_status()
+ revoke_token_data = revoke_token_response.json()
+
+ assert revoke_token_data
+ assert revoke_token_data["data"]
+ assert revoke_token_data["data"]["revokeAccessToken"]
+ assert revoke_token_data["data"]["revokeAccessToken"] is True
+
+
+ # Remove the policy
+ remove_policy(policy_urn, admin_session)
+
+
+ # Ensure that user can't create access token after policy is removed
+ _ensure_cant_perform_action(user_session, create_access_token,"createAccessToken")
\ No newline at end of file
From ddb4e1b5ffa01763d7d3353a506d4329faf11e25 Mon Sep 17 00:00:00 2001
From: Davi Arnaut
Date: Fri, 3 Nov 2023 10:26:11 -0700
Subject: [PATCH 052/792] fix(mysql-setup): quote database name (#9169)
---
docker/mysql-setup/init.sql | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/docker/mysql-setup/init.sql b/docker/mysql-setup/init.sql
index b789329ddfd17..b6a1d47fb2a02 100644
--- a/docker/mysql-setup/init.sql
+++ b/docker/mysql-setup/init.sql
@@ -1,6 +1,6 @@
-- create datahub database
-CREATE DATABASE IF NOT EXISTS DATAHUB_DB_NAME CHARACTER SET utf8mb4 COLLATE utf8mb4_bin;
-USE DATAHUB_DB_NAME;
+CREATE DATABASE IF NOT EXISTS `DATAHUB_DB_NAME` CHARACTER SET utf8mb4 COLLATE utf8mb4_bin;
+USE `DATAHUB_DB_NAME`;
-- create metadata aspect table
create table if not exists metadata_aspect_v2 (
From c2bc41d15eed31f89076913f641298ded5219a4f Mon Sep 17 00:00:00 2001
From: david-leifker <114954101+david-leifker@users.noreply.github.com>
Date: Fri, 3 Nov 2023 12:29:31 -0500
Subject: [PATCH 053/792] fix(health): fix health check url authentication
(#9117)
---
.../authentication/AuthenticationRequest.java | 12 ++++
.../filter/AuthenticationFilter.java | 13 ++++-
.../HealthStatusAuthenticator.java | 55 +++++++++++++++++++
.../src/main/resources/application.yml | 2 +
metadata-service/health-servlet/build.gradle | 22 --------
.../openapi/config/SpringWebConfig.java | 2 -
.../health}/HealthCheckController.java | 30 ++++++----
metadata-service/war/build.gradle | 1 -
.../webapp/WEB-INF/openapiServlet-servlet.xml | 2 +-
settings.gradle | 1 -
10 files changed, 101 insertions(+), 39 deletions(-)
create mode 100644 metadata-service/auth-impl/src/main/java/com/datahub/authentication/authenticator/HealthStatusAuthenticator.java
delete mode 100644 metadata-service/health-servlet/build.gradle
rename metadata-service/{health-servlet/src/main/java/com/datahub/health/controller => openapi-servlet/src/main/java/io/datahubproject/openapi/health}/HealthCheckController.java (79%)
diff --git a/metadata-auth/auth-api/src/main/java/com/datahub/authentication/AuthenticationRequest.java b/metadata-auth/auth-api/src/main/java/com/datahub/authentication/AuthenticationRequest.java
index 91f15f9d5ae61..5673bac5442b2 100644
--- a/metadata-auth/auth-api/src/main/java/com/datahub/authentication/AuthenticationRequest.java
+++ b/metadata-auth/auth-api/src/main/java/com/datahub/authentication/AuthenticationRequest.java
@@ -1,6 +1,8 @@
package com.datahub.authentication;
import com.datahub.plugins.auth.authentication.Authenticator;
+import lombok.Getter;
+
import java.util.Map;
import java.util.Objects;
import java.util.TreeMap;
@@ -13,14 +15,24 @@
* Currently, this class only hold the inbound request's headers, but could certainly be extended
* to contain additional information like the request parameters, body, ip, etc as needed.
*/
+@Getter
public class AuthenticationRequest {
private final Map caseInsensitiveHeaders;
+ private final String servletInfo;
+ private final String pathInfo;
+
public AuthenticationRequest(@Nonnull final Map requestHeaders) {
+ this("", "", requestHeaders);
+ }
+
+ public AuthenticationRequest(@Nonnull String servletInfo, @Nonnull String pathInfo, @Nonnull final Map requestHeaders) {
Objects.requireNonNull(requestHeaders);
caseInsensitiveHeaders = new TreeMap<>(String.CASE_INSENSITIVE_ORDER);
caseInsensitiveHeaders.putAll(requestHeaders);
+ this.servletInfo = servletInfo;
+ this.pathInfo = pathInfo;
}
/**
diff --git a/metadata-service/auth-filter/src/main/java/com/datahub/auth/authentication/filter/AuthenticationFilter.java b/metadata-service/auth-filter/src/main/java/com/datahub/auth/authentication/filter/AuthenticationFilter.java
index e15918a813158..8c7b3ac8b98f0 100644
--- a/metadata-service/auth-filter/src/main/java/com/datahub/auth/authentication/filter/AuthenticationFilter.java
+++ b/metadata-service/auth-filter/src/main/java/com/datahub/auth/authentication/filter/AuthenticationFilter.java
@@ -2,6 +2,7 @@
import com.datahub.authentication.authenticator.AuthenticatorChain;
import com.datahub.authentication.authenticator.DataHubSystemAuthenticator;
+import com.datahub.authentication.authenticator.HealthStatusAuthenticator;
import com.datahub.authentication.authenticator.NoOpAuthenticator;
import com.datahub.authentication.token.StatefulTokenService;
import com.datahub.plugins.PluginConstant;
@@ -29,6 +30,7 @@
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.Collections;
+import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Optional;
@@ -148,7 +150,7 @@ private void buildAuthenticatorChain() {
}
private AuthenticationRequest buildAuthContext(HttpServletRequest request) {
- return new AuthenticationRequest(Collections.list(request.getHeaderNames())
+ return new AuthenticationRequest(request.getServletPath(), request.getPathInfo(), Collections.list(request.getHeaderNames())
.stream()
.collect(Collectors.toMap(headerName -> headerName, request::getHeader)));
}
@@ -242,7 +244,14 @@ private void registerNativeAuthenticator(AuthenticatorChain authenticatorChain,
final Authenticator authenticator = clazz.newInstance();
// Successfully created authenticator. Now init and register it.
log.debug(String.format("Initializing Authenticator with name %s", type));
- authenticator.init(configs, authenticatorContext);
+ if (authenticator instanceof HealthStatusAuthenticator) {
+ Map authenticatorConfig = new HashMap<>(Map.of(SYSTEM_CLIENT_ID_CONFIG,
+ this.configurationProvider.getAuthentication().getSystemClientId()));
+ authenticatorConfig.putAll(Optional.ofNullable(internalAuthenticatorConfig.getConfigs()).orElse(Collections.emptyMap()));
+ authenticator.init(authenticatorConfig, authenticatorContext);
+ } else {
+ authenticator.init(configs, authenticatorContext);
+ }
log.info(String.format("Registering Authenticator with name %s", type));
authenticatorChain.register(authenticator);
} catch (Exception e) {
diff --git a/metadata-service/auth-impl/src/main/java/com/datahub/authentication/authenticator/HealthStatusAuthenticator.java b/metadata-service/auth-impl/src/main/java/com/datahub/authentication/authenticator/HealthStatusAuthenticator.java
new file mode 100644
index 0000000000000..5749eacf5d25d
--- /dev/null
+++ b/metadata-service/auth-impl/src/main/java/com/datahub/authentication/authenticator/HealthStatusAuthenticator.java
@@ -0,0 +1,55 @@
+package com.datahub.authentication.authenticator;
+
+import com.datahub.authentication.Actor;
+import com.datahub.authentication.ActorType;
+import com.datahub.authentication.Authentication;
+import com.datahub.authentication.AuthenticationException;
+import com.datahub.authentication.AuthenticationRequest;
+import com.datahub.authentication.AuthenticatorContext;
+import com.datahub.plugins.auth.authentication.Authenticator;
+import lombok.extern.slf4j.Slf4j;
+
+import javax.annotation.Nonnull;
+import javax.annotation.Nullable;
+import java.util.Collections;
+import java.util.Map;
+import java.util.Objects;
+import java.util.Set;
+
+import static com.datahub.authentication.AuthenticationConstants.SYSTEM_CLIENT_ID_CONFIG;
+
+
+/**
+ * This Authenticator is used for allowing access for unauthenticated health check endpoints
+ *
+ * It exists to support load balancers, liveness/readiness checks
+ *
+ */
+@Slf4j
+public class HealthStatusAuthenticator implements Authenticator {
+ private static final Set HEALTH_ENDPOINTS = Set.of(
+ "/openapi/check/",
+ "/openapi/up/"
+ );
+ private String systemClientId;
+
+ @Override
+ public void init(@Nonnull final Map config, @Nullable final AuthenticatorContext context) {
+ Objects.requireNonNull(config, "Config parameter cannot be null");
+ this.systemClientId = Objects.requireNonNull((String) config.get(SYSTEM_CLIENT_ID_CONFIG),
+ String.format("Missing required config %s", SYSTEM_CLIENT_ID_CONFIG));
+ }
+
+ @Override
+ public Authentication authenticate(@Nonnull AuthenticationRequest context) throws AuthenticationException {
+ Objects.requireNonNull(context);
+ if (HEALTH_ENDPOINTS.stream().anyMatch(prefix -> String.join("", context.getServletInfo(), context.getPathInfo()).startsWith(prefix))) {
+ return new Authentication(
+ new Actor(ActorType.USER, systemClientId),
+ "",
+ Collections.emptyMap()
+ );
+ }
+ throw new AuthenticationException("Authorization not allowed. Non-health check endpoint.");
+ }
+}
diff --git a/metadata-service/configuration/src/main/resources/application.yml b/metadata-service/configuration/src/main/resources/application.yml
index b817208672e08..91b10a75c922e 100644
--- a/metadata-service/configuration/src/main/resources/application.yml
+++ b/metadata-service/configuration/src/main/resources/application.yml
@@ -11,6 +11,8 @@ authentication:
# Key used to validate incoming tokens. Should typically be the same as authentication.tokenService.signingKey
signingKey: ${DATAHUB_TOKEN_SERVICE_SIGNING_KEY:WnEdIeTG/VVCLQqGwC/BAkqyY0k+H8NEAtWGejrBI94=}
salt: ${DATAHUB_TOKEN_SERVICE_SALT:ohDVbJBvHHVJh9S/UA4BYF9COuNnqqVhr9MLKEGXk1O=}
+ # Required for unauthenticated health check endpoints - best not to remove.
+ - type: com.datahub.authentication.authenticator.HealthStatusAuthenticator
# Normally failures are only warnings, enable this to throw them.
logAuthenticatorExceptions: ${METADATA_SERVICE_AUTHENTICATOR_EXCEPTIONS_ENABLED:false}
diff --git a/metadata-service/health-servlet/build.gradle b/metadata-service/health-servlet/build.gradle
deleted file mode 100644
index 6095f724b3cd4..0000000000000
--- a/metadata-service/health-servlet/build.gradle
+++ /dev/null
@@ -1,22 +0,0 @@
-apply plugin: 'java'
-
-dependencies {
-
- implementation project(':metadata-service:factories')
-
- implementation externalDependency.guava
- implementation externalDependency.reflections
- implementation externalDependency.springBoot
- implementation externalDependency.springCore
- implementation externalDependency.springDocUI
- implementation externalDependency.springWeb
- implementation externalDependency.springWebMVC
- implementation externalDependency.springBeans
- implementation externalDependency.springContext
- implementation externalDependency.slf4jApi
- compileOnly externalDependency.lombok
- implementation externalDependency.antlr4Runtime
- implementation externalDependency.antlr4
-
- annotationProcessor externalDependency.lombok
-}
\ No newline at end of file
diff --git a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/config/SpringWebConfig.java b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/config/SpringWebConfig.java
index 71e8c79a2275a..e4f49df90c392 100644
--- a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/config/SpringWebConfig.java
+++ b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/config/SpringWebConfig.java
@@ -44,7 +44,6 @@ public GroupedOpenApi defaultOpenApiGroup() {
.group("default")
.packagesToExclude(
"io.datahubproject.openapi.operations",
- "com.datahub.health",
"io.datahubproject.openapi.health"
).build();
}
@@ -55,7 +54,6 @@ public GroupedOpenApi operationsOpenApiGroup() {
.group("operations")
.packagesToScan(
"io.datahubproject.openapi.operations",
- "com.datahub.health",
"io.datahubproject.openapi.health"
).build();
}
diff --git a/metadata-service/health-servlet/src/main/java/com/datahub/health/controller/HealthCheckController.java b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/health/HealthCheckController.java
similarity index 79%
rename from metadata-service/health-servlet/src/main/java/com/datahub/health/controller/HealthCheckController.java
rename to metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/health/HealthCheckController.java
index c200e63e0d497..c90603bf88c31 100644
--- a/metadata-service/health-servlet/src/main/java/com/datahub/health/controller/HealthCheckController.java
+++ b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/health/HealthCheckController.java
@@ -1,5 +1,6 @@
-package com.datahub.health.controller;
+package io.datahubproject.openapi.health;
+import com.google.common.base.Supplier;
import com.google.common.base.Suppliers;
import com.linkedin.gms.factory.config.ConfigurationProvider;
import io.swagger.v3.oas.annotations.tags.Tag;
@@ -9,7 +10,6 @@
import java.util.List;
import java.util.Map;
import java.util.concurrent.TimeUnit;
-import java.util.function.Supplier;
import org.opensearch.action.admin.cluster.health.ClusterHealthRequest;
import org.opensearch.action.admin.cluster.health.ClusterHealthResponse;
@@ -27,7 +27,7 @@
@RestController
-@RequestMapping("/check")
+@RequestMapping("/")
@Tag(name = "HealthCheck", description = "An API for checking health of GMS and its clients.")
public class HealthCheckController {
@Autowired
@@ -41,6 +41,12 @@ public HealthCheckController(ConfigurationProvider config) {
this::getElasticHealth, config.getHealthCheck().getCacheDurationSeconds(), TimeUnit.SECONDS);
}
+ @GetMapping(path = "/check/ready", produces = MediaType.APPLICATION_JSON_VALUE)
+ public ResponseEntity getCombinedHealthCheck(String... checks) {
+ return ResponseEntity.status(getCombinedDebug(checks).getStatusCode())
+ .body(getCombinedDebug(checks).getStatusCode().is2xxSuccessful());
+ }
+
/**
* Combined health check endpoint for checking GMS clients.
* For now, just checks the health of the ElasticSearch client
@@ -48,11 +54,10 @@ public HealthCheckController(ConfigurationProvider config) {
* that component). The status code will be 200 if all components are okay, and 500 if one or more components are not
* healthy.
*/
- @GetMapping(path = "/ready", produces = MediaType.APPLICATION_JSON_VALUE)
- public ResponseEntity>> getCombinedHealthCheck(String... checks) {
-
+ @GetMapping(path = "/debug/ready", produces = MediaType.APPLICATION_JSON_VALUE)
+ public ResponseEntity>> getCombinedDebug(String... checks) {
Map>> healthChecks = new HashMap<>();
- healthChecks.put("elasticsearch", this::getElasticHealthWithCache);
+ healthChecks.put("elasticsearch", this::getElasticDebugWithCache);
// Add new components here
List componentsToCheck = checks != null && checks.length > 0
@@ -67,7 +72,6 @@ public ResponseEntity>> getCombinedHealthChec
.get());
}
-
boolean isHealthy = componentHealth.values().stream().allMatch(resp -> resp.getStatusCode() == HttpStatus.OK);
if (isHealthy) {
return ResponseEntity.ok(componentHealth);
@@ -75,12 +79,18 @@ public ResponseEntity>> getCombinedHealthChec
return ResponseEntity.status(HttpStatus.SERVICE_UNAVAILABLE).body(componentHealth);
}
+ @GetMapping(path = "/check/elastic", produces = MediaType.APPLICATION_JSON_VALUE)
+ public ResponseEntity getElasticHealthWithCache() {
+ return ResponseEntity.status(getElasticDebugWithCache().getStatusCode())
+ .body(getElasticDebugWithCache().getStatusCode().is2xxSuccessful());
+ }
+
/**
* Checks the memoized cache for the latest elastic health check result
* @return The ResponseEntity containing the health check result
*/
- @GetMapping(path = "/elastic", produces = MediaType.APPLICATION_JSON_VALUE)
- public ResponseEntity getElasticHealthWithCache() {
+ @GetMapping(path = "/debug/elastic", produces = MediaType.APPLICATION_JSON_VALUE)
+ public ResponseEntity getElasticDebugWithCache() {
return this.memoizedSupplier.get();
}
diff --git a/metadata-service/war/build.gradle b/metadata-service/war/build.gradle
index 122c2b9d5357b..54e95fdcfe579 100644
--- a/metadata-service/war/build.gradle
+++ b/metadata-service/war/build.gradle
@@ -17,7 +17,6 @@ dependencies {
runtimeOnly project(':metadata-service:servlet')
runtimeOnly project(':metadata-service:auth-servlet-impl')
runtimeOnly project(':metadata-service:graphql-servlet-impl')
- runtimeOnly project(':metadata-service:health-servlet')
runtimeOnly project(':metadata-service:openapi-servlet')
runtimeOnly project(':metadata-service:openapi-entity-servlet')
runtimeOnly project(':metadata-service:openapi-analytics-servlet')
diff --git a/metadata-service/war/src/main/webapp/WEB-INF/openapiServlet-servlet.xml b/metadata-service/war/src/main/webapp/WEB-INF/openapiServlet-servlet.xml
index 3077cfb062638..fb2bc6c0336cd 100644
--- a/metadata-service/war/src/main/webapp/WEB-INF/openapiServlet-servlet.xml
+++ b/metadata-service/war/src/main/webapp/WEB-INF/openapiServlet-servlet.xml
@@ -3,7 +3,7 @@
xmlns:context="http://www.springframework.org/schema/context"
xsi:schemaLocation="http://www.springframework.org/schema/beans http://www.springframework.org/schema/beans/spring-beans-3.0.xsd http://www.springframework.org/schema/context http://www.springframework.org/schema/context/spring-context-3.0.xsd">
-
+
diff --git a/settings.gradle b/settings.gradle
index 52de461383b5e..d2844fe00cdbc 100644
--- a/settings.gradle
+++ b/settings.gradle
@@ -8,7 +8,6 @@ include 'metadata-service:auth-config'
include 'metadata-service:auth-impl'
include 'metadata-service:auth-filter'
include 'metadata-service:auth-servlet-impl'
-include 'metadata-service:health-servlet'
include 'metadata-service:restli-api'
include 'metadata-service:restli-client'
include 'metadata-service:restli-servlet-impl'
From efd73a5f5766872ebd4997bbb261d2f95d295dd6 Mon Sep 17 00:00:00 2001
From: david-leifker <114954101+david-leifker@users.noreply.github.com>
Date: Fri, 3 Nov 2023 16:19:39 -0500
Subject: [PATCH 054/792] fix(elasticsearch): fix elasticsearch-setup for
dropped 000001 index (#9074)
---
docker/elasticsearch-setup/create-indices.sh | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/docker/elasticsearch-setup/create-indices.sh b/docker/elasticsearch-setup/create-indices.sh
index 343013402394f..5c4eb3ce3851e 100755
--- a/docker/elasticsearch-setup/create-indices.sh
+++ b/docker/elasticsearch-setup/create-indices.sh
@@ -129,7 +129,7 @@ function create_datahub_usage_event_aws_elasticsearch() {
if [ $USAGE_EVENT_STATUS -eq 200 ]; then
USAGE_EVENT_DEFINITION=$(curl "${CURL_ARGS[@]}" "$ELASTICSEARCH_URL/${PREFIX}datahub_usage_event")
# the definition is expected to contain "datahub_usage_event-000001" string
- if [[ $USAGE_EVENT_DEFINITION != *"datahub_usage_event-$INDEX_SUFFIX"* ]]; then
+ if [[ $USAGE_EVENT_DEFINITION != *"datahub_usage_event-"* ]]; then
# ... if it doesn't, we need to drop it
echo -e "\n>>> deleting invalid datahub_usage_event ..."
curl "${CURL_ARGS[@]}" -XDELETE "$ELASTICSEARCH_URL/${PREFIX}datahub_usage_event"
From ac1a5a6d184d7991d0006bfe33d31b4471b64729 Mon Sep 17 00:00:00 2001
From: Kos Korchak <97058061+kkorchak@users.noreply.github.com>
Date: Sat, 4 Nov 2023 15:06:09 -0400
Subject: [PATCH 055/792] fix(tests): Origin/fix flaky glossary navigation
cypress test (#9175)
---
.../tests/cypress/cypress/e2e/glossary/glossary_navigation.js | 4 ++--
.../tests/cypress/cypress/e2e/lineage/impact_analysis.js | 4 ++++
2 files changed, 6 insertions(+), 2 deletions(-)
diff --git a/smoke-test/tests/cypress/cypress/e2e/glossary/glossary_navigation.js b/smoke-test/tests/cypress/cypress/e2e/glossary/glossary_navigation.js
index aeceaf99be889..c6e9d93f71b8c 100644
--- a/smoke-test/tests/cypress/cypress/e2e/glossary/glossary_navigation.js
+++ b/smoke-test/tests/cypress/cypress/e2e/glossary/glossary_navigation.js
@@ -27,7 +27,7 @@ describe("glossary sidebar navigation test", () => {
cy.waitTextVisible("Moved Glossary Term!");
// Ensure the new term is under the parent term group in the navigation sidebar
- cy.get('[data-testid="glossary-browser-sidebar"]').contains(glossaryTermGroup).click();
+ cy.get('[data-testid="glossary-browser-sidebar"]').contains(glossaryTermGroup).click().wait(3000);
cy.get('*[class^="GlossaryEntitiesList"]').contains(glossaryTerm).should("be.visible");
// Move a term group from the root level to be under a parent term group
@@ -41,7 +41,7 @@ describe("glossary sidebar navigation test", () => {
cy.waitTextVisible("Moved Term Group!");
// Ensure it is no longer on the sidebar navigator at the top level but shows up under the new parent
- cy.get('[data-testid="glossary-browser-sidebar"]').contains(glossaryParentGroup).click();
+ cy.get('[data-testid="glossary-browser-sidebar"]').contains(glossaryParentGroup).click().wait(3000);
cy.get('*[class^="GlossaryEntitiesList"]').contains(glossaryTermGroup).should("be.visible");
// Delete a term group
diff --git a/smoke-test/tests/cypress/cypress/e2e/lineage/impact_analysis.js b/smoke-test/tests/cypress/cypress/e2e/lineage/impact_analysis.js
index defb786d1fa5d..784ccf8f0f87d 100644
--- a/smoke-test/tests/cypress/cypress/e2e/lineage/impact_analysis.js
+++ b/smoke-test/tests/cypress/cypress/e2e/lineage/impact_analysis.js
@@ -21,6 +21,10 @@ const startAtDataSetLineage = () => {
}
describe("impact analysis", () => {
+ beforeEach(() => {
+ cy.on('uncaught:exception', (err, runnable) => { return false; });
+ });
+
it("can see 1 hop of lineage by default", () => {
startAtDataSetLineage()
From 60131a85438efc3c5d75fe6d4ed4cff634792325 Mon Sep 17 00:00:00 2001
From: Alex Waldron <51122673+walter9388@users.noreply.github.com>
Date: Sat, 4 Nov 2023 19:07:00 +0000
Subject: [PATCH 056/792] fix: bad lineage link in
`LineageGraphOnboardingConfig.tsx` (#9162)
---
.../src/app/onboarding/config/LineageGraphOnboardingConfig.tsx | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/datahub-web-react/src/app/onboarding/config/LineageGraphOnboardingConfig.tsx b/datahub-web-react/src/app/onboarding/config/LineageGraphOnboardingConfig.tsx
index 54bae6978a4a9..89a01ab3bd241 100644
--- a/datahub-web-react/src/app/onboarding/config/LineageGraphOnboardingConfig.tsx
+++ b/datahub-web-react/src/app/onboarding/config/LineageGraphOnboardingConfig.tsx
@@ -23,7 +23,7 @@ export const LineageGraphOnboardingConfig: OnboardingStep[] = [
here.
From 7cfe3c79794e2c5660c405cdb447086ed32b52f4 Mon Sep 17 00:00:00 2001
From: Sumit Patil <91715217+sumitappt@users.noreply.github.com>
Date: Sun, 5 Nov 2023 01:02:23 +0530
Subject: [PATCH 057/792] =?UTF-8?q?OBS-191=20|=20Viewing=20domains=20page?=
=?UTF-8?q?=20should=20not=20require=20Manage=20Domains=20priv=E2=80=A6=20?=
=?UTF-8?q?(#9156)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
---
datahub-web-react/src/app/shared/admin/HeaderLinks.tsx | 3 ---
1 file changed, 3 deletions(-)
diff --git a/datahub-web-react/src/app/shared/admin/HeaderLinks.tsx b/datahub-web-react/src/app/shared/admin/HeaderLinks.tsx
index ce1ad93565ba4..3f46f35889fd1 100644
--- a/datahub-web-react/src/app/shared/admin/HeaderLinks.tsx
+++ b/datahub-web-react/src/app/shared/admin/HeaderLinks.tsx
@@ -73,7 +73,6 @@ export function HeaderLinks(props: Props) {
const showSettings = true;
const showIngestion =
isIngestionEnabled && me && me.platformPrivileges?.manageIngestion && me.platformPrivileges?.manageSecrets;
- const showDomains = me?.platformPrivileges?.createDomains || me?.platformPrivileges?.manageDomains;
useUpdateEducationStepIdsAllowlist(!!showIngestion, HOME_PAGE_INGESTION_ID);
@@ -106,7 +105,6 @@ export function HeaderLinks(props: Props) {
View and modify your data dictionary
- {showDomains && (
@@ -121,7 +119,6 @@ export function HeaderLinks(props: Props) {
Manage related groups of data assets
- )}
}
>
From 81daae815af4498192f487418941379b2170762c Mon Sep 17 00:00:00 2001
From: gaurav2733 <77378510+gaurav2733@users.noreply.github.com>
Date: Sun, 5 Nov 2023 01:16:39 +0530
Subject: [PATCH 058/792] fix: expand the stats row in search preview cards
(#9140)
---
.../app/entity/dashboard/shared/DashboardStatsSummary.tsx | 3 +++
.../src/app/entity/dataset/shared/DatasetStatsSummary.tsx | 3 +++
.../src/app/entity/dataset/shared/ExpandingStat.tsx | 5 ++---
3 files changed, 8 insertions(+), 3 deletions(-)
diff --git a/datahub-web-react/src/app/entity/dashboard/shared/DashboardStatsSummary.tsx b/datahub-web-react/src/app/entity/dashboard/shared/DashboardStatsSummary.tsx
index e8fb4c16aca9c..fb6364cffac8b 100644
--- a/datahub-web-react/src/app/entity/dashboard/shared/DashboardStatsSummary.tsx
+++ b/datahub-web-react/src/app/entity/dashboard/shared/DashboardStatsSummary.tsx
@@ -11,6 +11,9 @@ import ExpandingStat from '../../dataset/shared/ExpandingStat';
const StatText = styled.span`
color: ${ANTD_GRAY[8]};
+ @media (min-width: 1024px) {
+ width: 100%;
+ white-space: nowrap;
`;
const HelpIcon = styled(QuestionCircleOutlined)`
diff --git a/datahub-web-react/src/app/entity/dataset/shared/DatasetStatsSummary.tsx b/datahub-web-react/src/app/entity/dataset/shared/DatasetStatsSummary.tsx
index 14f550de25be7..3dcd41a3f8a41 100644
--- a/datahub-web-react/src/app/entity/dataset/shared/DatasetStatsSummary.tsx
+++ b/datahub-web-react/src/app/entity/dataset/shared/DatasetStatsSummary.tsx
@@ -12,6 +12,9 @@ import ExpandingStat from './ExpandingStat';
const StatText = styled.span<{ color: string }>`
color: ${(props) => props.color};
+ @media (min-width: 1160px) {
+ width: 100%;
+ white-space: nowrap;
`;
const PopoverContent = styled.div`
diff --git a/datahub-web-react/src/app/entity/dataset/shared/ExpandingStat.tsx b/datahub-web-react/src/app/entity/dataset/shared/ExpandingStat.tsx
index 8101a696bf274..4e223b6e54058 100644
--- a/datahub-web-react/src/app/entity/dataset/shared/ExpandingStat.tsx
+++ b/datahub-web-react/src/app/entity/dataset/shared/ExpandingStat.tsx
@@ -2,9 +2,7 @@ import React, { ReactNode, useEffect, useRef, useState } from 'react';
import styled from 'styled-components';
const ExpandingStatContainer = styled.span<{ disabled: boolean; expanded: boolean; width: string }>`
- overflow: hidden;
- white-space: nowrap;
- width: ${(props) => props.width};
+ max-width: 100%;
transition: width 250ms ease;
`;
@@ -13,6 +11,7 @@ const ExpandingStat = ({
render,
}: {
disabled?: boolean;
+
render: (isExpanded: boolean) => ReactNode;
}) => {
const contentRef = useRef(null);
From 02156662b5e7f24f3db908d4d19f8d1bb94a32b5 Mon Sep 17 00:00:00 2001
From: Harshal Sheth
Date: Mon, 6 Nov 2023 12:47:07 -0800
Subject: [PATCH 059/792] docs(ingest): clarify adding source guide (#9161)
---
metadata-ingestion/adding-source.md | 32 ++++++++++++++++-------------
1 file changed, 18 insertions(+), 14 deletions(-)
diff --git a/metadata-ingestion/adding-source.md b/metadata-ingestion/adding-source.md
index a0930102c6827..6baddf6b2010d 100644
--- a/metadata-ingestion/adding-source.md
+++ b/metadata-ingestion/adding-source.md
@@ -6,7 +6,7 @@ There are two ways of adding a metadata ingestion source.
2. You are writing the custom source for yourself and are not going to contribute back (yet).
If you are going for case (1) just follow the steps 1 to 9 below. In case you are building it for yourself you can skip
-steps 4-9 (but maybe write tests and docs for yourself as well) and follow the documentation
+steps 4-8 (but maybe write tests and docs for yourself as well) and follow the documentation
on [how to use custom ingestion sources](../docs/how/add-custom-ingestion-source.md)
without forking Datahub.
@@ -27,6 +27,7 @@ from `ConfigModel`. The [file source](./src/datahub/ingestion/source/file.py) is
We use [pydantic](https://pydantic-docs.helpmanual.io) conventions for documenting configuration flags. Use the `description` attribute to write rich documentation for your configuration field.
For example, the following code:
+
```python
from pydantic import Field
from datahub.api.configuration.common import ConfigModel
@@ -49,12 +50,10 @@ generates the following documentation:
-
:::note
Inline markdown or code snippets are not yet supported for field level documentation.
:::
-
### 2. Set up the reporter
The reporter interface enables the source to report statistics, warnings, failures, and other information about the run.
@@ -71,6 +70,8 @@ some [convenience methods](./src/datahub/emitter/mce_builder.py) for commonly us
### 4. Set up the dependencies
+Note: Steps 4-8 are only required if you intend to contribute the source back to the Datahub project.
+
Declare the source's pip dependencies in the `plugins` variable of the [setup script](./setup.py).
### 5. Enable discoverability
@@ -119,37 +120,38 @@ from datahub.ingestion.api.decorators import (
@capability(SourceCapability.LINEAGE_COARSE, "Enabled by default")
class FileSource(Source):
"""
-
- The File Source can be used to produce all kinds of metadata from a generic metadata events file.
+
+ The File Source can be used to produce all kinds of metadata from a generic metadata events file.
:::note
Events in this file can be in MCE form or MCP form.
:::
-
+
"""
... source code goes here
```
-
#### 7.2 Write custom documentation
-- Create a copy of [`source-docs-template.md`](./source-docs-template.md) and edit all relevant components.
+- Create a copy of [`source-docs-template.md`](./source-docs-template.md) and edit all relevant components.
- Name the document as `` and move it to `metadata-ingestion/docs/sources//.md`. For example for the Kafka platform, under the `kafka` plugin, move the document to `metadata-ingestion/docs/sources/kafka/kafka.md`.
- Add a quickstart recipe corresponding to the plugin under `metadata-ingestion/docs/sources//_recipe.yml`. For example, for the Kafka platform, under the `kafka` plugin, there is a quickstart recipe located at `metadata-ingestion/docs/sources/kafka/kafka_recipe.yml`.
- To write platform-specific documentation (that is cross-plugin), write the documentation under `metadata-ingestion/docs/sources//README.md`. For example, cross-plugin documentation for the BigQuery platform is located under `metadata-ingestion/docs/sources/bigquery/README.md`.
#### 7.3 Viewing the Documentation
-Documentation for the source can be viewed by running the documentation generator from the `docs-website` module.
+Documentation for the source can be viewed by running the documentation generator from the `docs-website` module.
##### Step 1: Build the Ingestion docs
+
```console
# From the root of DataHub repo
./gradlew :metadata-ingestion:docGen
```
If this finishes successfully, you will see output messages like:
+
```console
Ingestion Documentation Generation Complete
############################################
@@ -170,7 +172,8 @@ Ingestion Documentation Generation Complete
You can also find documentation files generated at `./docs/generated/ingestion/sources` relative to the root of the DataHub repo. You should be able to locate your specific source's markdown file here and investigate it to make sure things look as expected.
#### Step 2: Build the Entire Documentation
-To view how this documentation looks in the browser, there is one more step. Just build the entire docusaurus page from the `docs-website` module.
+
+To view how this documentation looks in the browser, there is one more step. Just build the entire docusaurus page from the `docs-website` module.
```console
# From the root of DataHub repo
@@ -178,6 +181,7 @@ To view how this documentation looks in the browser, there is one more step. Jus
```
This will generate messages like:
+
```console
...
> Task :docs-website:yarnGenerate
@@ -219,15 +223,15 @@ BUILD SUCCESSFUL in 35s
36 actionable tasks: 16 executed, 20 up-to-date
```
-After this you need to run the following script from the `docs-website` module.
+After this you need to run the following script from the `docs-website` module.
+
```console
cd docs-website
npm run serve
```
-Now, browse to http://localhost:3000 or whichever port npm is running on, to browse the docs.
-Your source should show up on the left sidebar under `Metadata Ingestion / Sources`.
-
+Now, browse to http://localhost:3000 or whichever port npm is running on, to browse the docs.
+Your source should show up on the left sidebar under `Metadata Ingestion / Sources`.
### 8. Add SQL Alchemy mapping (if applicable)
From 4a4c29030c0cfd2da9eab01798bc74a94fbb8c1d Mon Sep 17 00:00:00 2001
From: Harshal Sheth
Date: Mon, 6 Nov 2023 12:47:24 -0800
Subject: [PATCH 060/792] chore: stop ingestion-smoke CI errors on forks
(#9160)
---
.github/workflows/docker-ingestion-smoke.yml | 1 +
1 file changed, 1 insertion(+)
diff --git a/.github/workflows/docker-ingestion-smoke.yml b/.github/workflows/docker-ingestion-smoke.yml
index 8d52c23792857..82b57d23609a5 100644
--- a/.github/workflows/docker-ingestion-smoke.yml
+++ b/.github/workflows/docker-ingestion-smoke.yml
@@ -47,6 +47,7 @@ jobs:
name: Build and Push Docker Image to Docker Hub
runs-on: ubuntu-latest
needs: setup
+ if: ${{ needs.setup.outputs.publish == 'true' }}
steps:
- name: Check out the repo
uses: actions/checkout@v3
From 86d2b08d2bbecc90e9adffd250c894abe54667e7 Mon Sep 17 00:00:00 2001
From: Harshal Sheth
Date: Mon, 6 Nov 2023 12:58:07 -0800
Subject: [PATCH 061/792] docs(ingest): inherit capabilities from superclasses
(#9174)
---
metadata-ingestion-modules/airflow-plugin/setup.py | 4 ++++
.../src/datahub/ingestion/api/decorators.py | 12 +++++++++++-
.../source/state/stateful_ingestion_base.py | 8 +++++++-
3 files changed, 22 insertions(+), 2 deletions(-)
diff --git a/metadata-ingestion-modules/airflow-plugin/setup.py b/metadata-ingestion-modules/airflow-plugin/setup.py
index a5af881022d8c..e88fc870cb333 100644
--- a/metadata-ingestion-modules/airflow-plugin/setup.py
+++ b/metadata-ingestion-modules/airflow-plugin/setup.py
@@ -101,6 +101,10 @@ def get_long_description():
f"acryl-datahub[testing-utils]{_self_pin}",
# Extra requirements for loading our test dags.
"apache-airflow[snowflake]>=2.0.2",
+ # Connexion's new version breaks Airflow:
+ # See https://github.com/apache/airflow/issues/35234.
+ # TODO: We should transition to using Airflow's constraints file.
+ "connexion<3",
# https://github.com/snowflakedb/snowflake-sqlalchemy/issues/350
# Eventually we want to set this to "snowflake-sqlalchemy>=1.4.3".
# However, that doesn't work with older versions of Airflow. Instead
diff --git a/metadata-ingestion/src/datahub/ingestion/api/decorators.py b/metadata-ingestion/src/datahub/ingestion/api/decorators.py
index 5e4427047104f..b390ffb9dd036 100644
--- a/metadata-ingestion/src/datahub/ingestion/api/decorators.py
+++ b/metadata-ingestion/src/datahub/ingestion/api/decorators.py
@@ -93,10 +93,20 @@ def capability(
"""
def wrapper(cls: Type) -> Type:
- if not hasattr(cls, "__capabilities"):
+ if not hasattr(cls, "__capabilities") or any(
+ # It's from this class and not a superclass.
+ cls.__capabilities is getattr(base, "__capabilities", None)
+ for base in cls.__bases__
+ ):
cls.__capabilities = {}
cls.get_capabilities = lambda: cls.__capabilities.values()
+ # If the superclasses have capability annotations, copy those over.
+ for base in cls.__bases__:
+ base_caps = getattr(base, "__capabilities", None)
+ if base_caps:
+ cls.__capabilities.update(base_caps)
+
cls.__capabilities[capability_name] = CapabilitySetting(
capability=capability_name, description=description, supported=supported
)
diff --git a/metadata-ingestion/src/datahub/ingestion/source/state/stateful_ingestion_base.py b/metadata-ingestion/src/datahub/ingestion/source/state/stateful_ingestion_base.py
index 7fb2cf9813cab..d11b1f9ad6a53 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/state/stateful_ingestion_base.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/state/stateful_ingestion_base.py
@@ -15,11 +15,12 @@
from datahub.configuration.time_window_config import BaseTimeWindowConfig
from datahub.configuration.validate_field_rename import pydantic_renamed_field
from datahub.ingestion.api.common import PipelineContext
+from datahub.ingestion.api.decorators import capability
from datahub.ingestion.api.ingestion_job_checkpointing_provider_base import (
IngestionCheckpointingProviderBase,
JobId,
)
-from datahub.ingestion.api.source import Source, SourceReport
+from datahub.ingestion.api.source import Source, SourceCapability, SourceReport
from datahub.ingestion.source.state.checkpoint import Checkpoint, StateType
from datahub.ingestion.source.state.use_case_handler import (
StatefulIngestionUsecaseHandlerBase,
@@ -177,6 +178,11 @@ class StatefulIngestionReport(SourceReport):
pass
+@capability(
+ SourceCapability.DELETION_DETECTION,
+ "Optionally enabled via `stateful_ingestion.remove_stale_metadata`",
+ supported=True,
+)
class StatefulIngestionSourceBase(Source):
"""
Defines the base class for all stateful sources.
From 2c58c63780970606e50ba95b382dc9ffbde17bfc Mon Sep 17 00:00:00 2001
From: Andrew Sikowitz
Date: Mon, 6 Nov 2023 15:58:57 -0500
Subject: [PATCH 062/792] fix(ingest/datahub-source): Order by version in
memory (#9185)
---
.../source/datahub/datahub_database_reader.py | 100 ++++++++++++++----
.../tests/unit/test_datahub_source.py | 51 +++++++++
2 files changed, 133 insertions(+), 18 deletions(-)
create mode 100644 metadata-ingestion/tests/unit/test_datahub_source.py
diff --git a/metadata-ingestion/src/datahub/ingestion/source/datahub/datahub_database_reader.py b/metadata-ingestion/src/datahub/ingestion/source/datahub/datahub_database_reader.py
index 96184d8d445e4..e4f1bb275487e 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/datahub/datahub_database_reader.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/datahub/datahub_database_reader.py
@@ -1,9 +1,11 @@
import json
import logging
from datetime import datetime
-from typing import Dict, Iterable, Optional, Tuple
+from typing import Any, Generic, Iterable, List, Optional, Tuple, TypeVar
from sqlalchemy import create_engine
+from sqlalchemy.engine import Row
+from typing_extensions import Protocol
from datahub.emitter.aspect import ASPECT_MAP
from datahub.emitter.mcp import MetadataChangeProposalWrapper
@@ -20,6 +22,62 @@
DATETIME_FORMAT = "%Y-%m-%d %H:%M:%S.%f"
+class VersionOrderable(Protocol):
+ createdon: Any # Should restrict to only orderable types
+ version: int
+
+
+ROW = TypeVar("ROW", bound=VersionOrderable)
+
+
+class VersionOrderer(Generic[ROW]):
+ """Orders rows by (createdon, version == 0).
+
+ That is, orders rows first by createdon, and for equal timestamps, puts version 0 rows last.
+ """
+
+ def __init__(self, enabled: bool):
+ # Stores all version 0 aspects for a given createdon timestamp
+ # Once we have emitted all aspects for a given timestamp, we can emit the version 0 aspects
+ # Guaranteeing that, for a given timestamp, we always ingest version 0 aspects last
+ self.queue: Optional[Tuple[datetime, List[ROW]]] = None
+ self.enabled = enabled
+
+ def __call__(self, rows: Iterable[ROW]) -> Iterable[ROW]:
+ for row in rows:
+ yield from self._process_row(row)
+ yield from self._flush_queue()
+
+ def _process_row(self, row: ROW) -> Iterable[ROW]:
+ if not self.enabled:
+ yield row
+ return
+
+ yield from self._attempt_queue_flush(row)
+ if row.version == 0:
+ self._add_to_queue(row)
+ else:
+ yield row
+
+ def _add_to_queue(self, row: ROW) -> None:
+ if self.queue is None:
+ self.queue = (row.createdon, [row])
+ else:
+ self.queue[1].append(row)
+
+ def _attempt_queue_flush(self, row: ROW) -> Iterable[ROW]:
+ if self.queue is None:
+ return
+
+ if row.createdon > self.queue[0]:
+ yield from self._flush_queue()
+
+ def _flush_queue(self) -> Iterable[ROW]:
+ if self.queue is not None:
+ yield from self.queue[1]
+ self.queue = None
+
+
class DataHubDatabaseReader:
def __init__(
self,
@@ -40,13 +98,14 @@ def query(self) -> str:
# Offset is generally 0, unless we repeat the same createdon twice
# Ensures stable order, chronological per (urn, aspect)
- # Version 0 last, only when createdon is the same. Otherwise relies on createdon order
+ # Relies on createdon order to reflect version order
+ # Ordering of entries with the same createdon is handled by VersionOrderer
return f"""
- SELECT urn, aspect, metadata, systemmetadata, createdon
+ SELECT urn, aspect, metadata, systemmetadata, createdon, version
FROM {self.engine.dialect.identifier_preparer.quote(self.config.database_table_name)}
WHERE createdon >= %(since_createdon)s
{"" if self.config.include_all_versions else "AND version = 0"}
- ORDER BY createdon, urn, aspect, CASE WHEN version = 0 THEN 1 ELSE 0 END, version
+ ORDER BY createdon, urn, aspect, version
LIMIT %(limit)s
OFFSET %(offset)s
"""
@@ -54,6 +113,14 @@ def query(self) -> str:
def get_aspects(
self, from_createdon: datetime, stop_time: datetime
) -> Iterable[Tuple[MetadataChangeProposalWrapper, datetime]]:
+ orderer = VersionOrderer[Row](enabled=self.config.include_all_versions)
+ rows = self._get_rows(from_createdon=from_createdon, stop_time=stop_time)
+ for row in orderer(rows):
+ mcp = self._parse_row(row)
+ if mcp:
+ yield mcp, row.createdon
+
+ def _get_rows(self, from_createdon: datetime, stop_time: datetime) -> Iterable[Row]:
with self.engine.connect() as conn:
ts = from_createdon
offset = 0
@@ -69,34 +136,31 @@ def get_aspects(
return
for i, row in enumerate(rows):
- row_dict = row._asdict()
- mcp = self._parse_row(row_dict)
- if mcp:
- yield mcp, row_dict["createdon"]
+ yield row
- if ts == row_dict["createdon"]:
- offset += i
+ if ts == row.createdon:
+ offset += i + 1
else:
- ts = row_dict["createdon"]
+ ts = row.createdon
offset = 0
- def _parse_row(self, d: Dict) -> Optional[MetadataChangeProposalWrapper]:
+ def _parse_row(self, row: Row) -> Optional[MetadataChangeProposalWrapper]:
try:
- json_aspect = post_json_transform(json.loads(d["metadata"]))
- json_metadata = post_json_transform(json.loads(d["systemmetadata"] or "{}"))
+ json_aspect = post_json_transform(json.loads(row.metadata))
+ json_metadata = post_json_transform(json.loads(row.systemmetadata or "{}"))
system_metadata = SystemMetadataClass.from_obj(json_metadata)
return MetadataChangeProposalWrapper(
- entityUrn=d["urn"],
- aspect=ASPECT_MAP[d["aspect"]].from_obj(json_aspect),
+ entityUrn=row.urn,
+ aspect=ASPECT_MAP[row.aspect].from_obj(json_aspect),
systemMetadata=system_metadata,
changeType=ChangeTypeClass.UPSERT,
)
except Exception as e:
logger.warning(
- f"Failed to parse metadata for {d['urn']}: {e}", exc_info=True
+ f"Failed to parse metadata for {row.urn}: {e}", exc_info=True
)
self.report.num_database_parse_errors += 1
self.report.database_parse_errors.setdefault(
str(e), LossyDict()
- ).setdefault(d["aspect"], LossyList()).append(d["urn"])
+ ).setdefault(row.aspect, LossyList()).append(row.urn)
return None
diff --git a/metadata-ingestion/tests/unit/test_datahub_source.py b/metadata-ingestion/tests/unit/test_datahub_source.py
new file mode 100644
index 0000000000000..adc131362b326
--- /dev/null
+++ b/metadata-ingestion/tests/unit/test_datahub_source.py
@@ -0,0 +1,51 @@
+from dataclasses import dataclass
+
+import pytest
+
+from datahub.ingestion.source.datahub.datahub_database_reader import (
+ VersionOrderable,
+ VersionOrderer,
+)
+
+
+@dataclass
+class MockRow(VersionOrderable):
+ createdon: int
+ version: int
+ urn: str
+
+
+@pytest.fixture
+def rows():
+ return [
+ MockRow(0, 0, "one"),
+ MockRow(0, 1, "one"),
+ MockRow(0, 0, "two"),
+ MockRow(0, 0, "three"),
+ MockRow(0, 1, "three"),
+ MockRow(0, 2, "three"),
+ MockRow(0, 1, "two"),
+ MockRow(0, 4, "three"),
+ MockRow(0, 5, "three"),
+ MockRow(1, 6, "three"),
+ MockRow(1, 0, "four"),
+ MockRow(2, 0, "five"),
+ MockRow(2, 1, "six"),
+ MockRow(2, 0, "six"),
+ MockRow(3, 0, "seven"),
+ MockRow(3, 0, "eight"),
+ ]
+
+
+def test_version_orderer(rows):
+ orderer = VersionOrderer[MockRow](enabled=True)
+ ordered_rows = list(orderer(rows))
+ assert ordered_rows == sorted(
+ ordered_rows, key=lambda x: (x.createdon, x.version == 0)
+ )
+
+
+def test_version_orderer_disabled(rows):
+ orderer = VersionOrderer[MockRow](enabled=False)
+ ordered_rows = list(orderer(rows))
+ assert ordered_rows == rows
From f2ce3ab62cc29bd0d4d4cade2577a50a39fa0f32 Mon Sep 17 00:00:00 2001
From: david-leifker <114954101+david-leifker@users.noreply.github.com>
Date: Mon, 6 Nov 2023 15:19:55 -0600
Subject: [PATCH 063/792] lint(frontend): fix HeaderLinks lint error (#9189)
---
.../src/app/shared/admin/HeaderLinks.tsx | 28 +++++++++----------
1 file changed, 14 insertions(+), 14 deletions(-)
diff --git a/datahub-web-react/src/app/shared/admin/HeaderLinks.tsx b/datahub-web-react/src/app/shared/admin/HeaderLinks.tsx
index 3f46f35889fd1..4a7a4938ea970 100644
--- a/datahub-web-react/src/app/shared/admin/HeaderLinks.tsx
+++ b/datahub-web-react/src/app/shared/admin/HeaderLinks.tsx
@@ -105,20 +105,20 @@ export function HeaderLinks(props: Props) {
View and modify your data dictionary
-
-
-
-
- Domains
-
- Manage related groups of data assets
-
-
+
+
+
+
+ Domains
+
+ Manage related groups of data assets
+
+
}
>
From 34aa08b7f38d733adcfe31ca97131e1ea52b49e6 Mon Sep 17 00:00:00 2001
From: John Joyce
Date: Mon, 6 Nov 2023 16:51:05 -0800
Subject: [PATCH 064/792] refactor(ui): Refactor entity page loading indicators
(#9195)
unrelated smoke test failing.
---
.../src/app/entity/EntityPage.tsx | 4 +-
.../containers/profile/EntityProfile.tsx | 3 --
.../profile/header/EntityHeader.tsx | 46 +++++++++++--------
.../header/EntityHeaderLoadingSection.tsx | 29 ++++++++++++
.../src/app/lineage/LineageExplorer.tsx | 7 +--
.../src/app/lineage/LineageLoadingSection.tsx | 27 +++++++++++
6 files changed, 86 insertions(+), 30 deletions(-)
create mode 100644 datahub-web-react/src/app/entity/shared/containers/profile/header/EntityHeaderLoadingSection.tsx
create mode 100644 datahub-web-react/src/app/lineage/LineageLoadingSection.tsx
diff --git a/datahub-web-react/src/app/entity/EntityPage.tsx b/datahub-web-react/src/app/entity/EntityPage.tsx
index 09233dbd89f69..916fa41795412 100644
--- a/datahub-web-react/src/app/entity/EntityPage.tsx
+++ b/datahub-web-react/src/app/entity/EntityPage.tsx
@@ -8,7 +8,6 @@ import { useEntityRegistry } from '../useEntityRegistry';
import analytics, { EventType } from '../analytics';
import { decodeUrn } from './shared/utils';
import { useGetGrantedPrivilegesQuery } from '../../graphql/policy.generated';
-import { Message } from '../shared/Message';
import { UnauthorizedPage } from '../authorization/UnauthorizedPage';
import { ErrorSection } from '../shared/error/ErrorSection';
import { VIEW_ENTITY_PAGE } from './shared/constants';
@@ -34,7 +33,7 @@ export const EntityPage = ({ entityType }: Props) => {
const isLineageSupported = entity.isLineageEnabled();
const isLineageMode = useIsLineageMode();
const authenticatedUserUrn = useUserContext()?.user?.urn;
- const { loading, error, data } = useGetGrantedPrivilegesQuery({
+ const { error, data } = useGetGrantedPrivilegesQuery({
variables: {
input: {
actorUrn: authenticatedUserUrn as string,
@@ -71,7 +70,6 @@ export const EntityPage = ({ entityType }: Props) => {
return (
<>
- {loading && }
{error && }
{data && !canViewEntityPage && }
{canViewEntityPage &&
diff --git a/datahub-web-react/src/app/entity/shared/containers/profile/EntityProfile.tsx b/datahub-web-react/src/app/entity/shared/containers/profile/EntityProfile.tsx
index 5384eb94429ed..74c127cb05dd9 100644
--- a/datahub-web-react/src/app/entity/shared/containers/profile/EntityProfile.tsx
+++ b/datahub-web-react/src/app/entity/shared/containers/profile/EntityProfile.tsx
@@ -4,7 +4,6 @@ import { MutationHookOptions, MutationTuple, QueryHookOptions, QueryResult } fro
import styled from 'styled-components/macro';
import { useHistory } from 'react-router';
import { EntityType, Exact } from '../../../../../types.generated';
-import { Message } from '../../../../shared/Message';
import {
getEntityPath,
getOnboardingStepIdsForEntityType,
@@ -274,7 +273,6 @@ export const EntityProfile = ({
}}
>
<>
- {loading && }
{(error && ) ||
(!loading && (
@@ -323,7 +321,6 @@ export const EntityProfile = ({
banner
/>
)}
- {loading && }
{(error && ) || (
{isLineageMode ? (
diff --git a/datahub-web-react/src/app/entity/shared/containers/profile/header/EntityHeader.tsx b/datahub-web-react/src/app/entity/shared/containers/profile/header/EntityHeader.tsx
index 97595a515b34d..69389f5dcf6fc 100644
--- a/datahub-web-react/src/app/entity/shared/containers/profile/header/EntityHeader.tsx
+++ b/datahub-web-react/src/app/entity/shared/containers/profile/header/EntityHeader.tsx
@@ -16,6 +16,7 @@ import ShareButton from '../../../../../shared/share/ShareButton';
import { capitalizeFirstLetterOnly } from '../../../../../shared/textUtil';
import { useUserContext } from '../../../../../context/useUserContext';
import { useEntityRegistry } from '../../../../../useEntityRegistry';
+import EntityHeaderLoadingSection from './EntityHeaderLoadingSection';
const TitleWrapper = styled.div`
display: flex;
@@ -81,7 +82,7 @@ type Props = {
};
export const EntityHeader = ({ headerDropdownItems, headerActionItems, isNameEditable, subHeader }: Props) => {
- const { urn, entityType, entityData } = useEntityData();
+ const { urn, entityType, entityData, loading } = useEntityData();
const refetch = useRefetch();
const me = useUserContext();
const platformName = getPlatformName(entityData);
@@ -99,25 +100,32 @@ export const EntityHeader = ({ headerDropdownItems, headerActionItems, isNameEdi
<>
-
-
-
- {entityData?.deprecation?.deprecated && (
-
- )}
- {entityData?.health && (
- ) || (
+ <>
+
+
+
+ {entityData?.deprecation?.deprecated && (
+
+ )}
+ {entityData?.health && (
+
+ )}
+
+
- )}
-
-
+ >
+ )}
diff --git a/datahub-web-react/src/app/entity/shared/containers/profile/header/EntityHeaderLoadingSection.tsx b/datahub-web-react/src/app/entity/shared/containers/profile/header/EntityHeaderLoadingSection.tsx
new file mode 100644
index 0000000000000..bbf813804edd4
--- /dev/null
+++ b/datahub-web-react/src/app/entity/shared/containers/profile/header/EntityHeaderLoadingSection.tsx
@@ -0,0 +1,29 @@
+import * as React from 'react';
+import { Skeleton, Space } from 'antd';
+import styled from 'styled-components';
+import { ANTD_GRAY } from '../../../constants';
+
+const ContextSkeleton = styled(Skeleton.Input)`
+ && {
+ width: 320px;
+ border-radius: 4px;
+ background-color: ${ANTD_GRAY[3]};
+ }
+`;
+
+const NameSkeleton = styled(Skeleton.Input)`
+ && {
+ width: 240px;
+ border-radius: 4px;
+ background-color: ${ANTD_GRAY[3]};
+ }
+`;
+
+export default function EntityHeaderLoadingSection() {
+ return (
+
+
+
+
+ );
+}
diff --git a/datahub-web-react/src/app/lineage/LineageExplorer.tsx b/datahub-web-react/src/app/lineage/LineageExplorer.tsx
index ed0b26bde11ef..f59d1843b8a99 100644
--- a/datahub-web-react/src/app/lineage/LineageExplorer.tsx
+++ b/datahub-web-react/src/app/lineage/LineageExplorer.tsx
@@ -3,7 +3,6 @@ import { useHistory } from 'react-router';
import { Button, Drawer } from 'antd';
import { InfoCircleOutlined } from '@ant-design/icons';
import styled from 'styled-components';
-import { Message } from '../shared/Message';
import { useEntityRegistry } from '../useEntityRegistry';
import CompactContext from '../shared/CompactContext';
import { EntityAndType, EntitySelectParams, FetchedEntities } from './types';
@@ -18,12 +17,10 @@ import { ErrorSection } from '../shared/error/ErrorSection';
import usePrevious from '../shared/usePrevious';
import { useGetLineageTimeParams } from './utils/useGetLineageTimeParams';
import analytics, { EventType } from '../analytics';
+import LineageLoadingSection from './LineageLoadingSection';
const DEFAULT_DISTANCE_FROM_TOP = 106;
-const LoadingMessage = styled(Message)`
- margin-top: 10%;
-`;
const FooterButtonGroup = styled.div`
display: flex;
justify-content: space-between;
@@ -167,7 +164,7 @@ export default function LineageExplorer({ urn, type }: Props) {
return (
<>
{error && }
- {loading && }
+ {loading && }
{!!data && (
+
+
+ );
+}
From 279fdd50d7870cc404a58a5c9afbf6b3c7c432ec Mon Sep 17 00:00:00 2001
From: david-leifker <114954101+david-leifker@users.noreply.github.com>
Date: Mon, 6 Nov 2023 19:51:20 -0600
Subject: [PATCH 065/792] fix(security): fix for zookeeper CVE-2023-44981
(#9190)
---
build.gradle | 4 ++--
metadata-service/restli-api/build.gradle | 6 ++++++
2 files changed, 8 insertions(+), 2 deletions(-)
diff --git a/build.gradle b/build.gradle
index bd282535fa13c..31e005e001cf0 100644
--- a/build.gradle
+++ b/build.gradle
@@ -1,7 +1,7 @@
buildscript {
ext.junitJupiterVersion = '5.6.1'
// Releases: https://github.com/linkedin/rest.li/blob/master/CHANGELOG.md
- ext.pegasusVersion = '29.45.0'
+ ext.pegasusVersion = '29.46.8'
ext.mavenVersion = '3.6.3'
ext.springVersion = '5.3.29'
ext.springBootVersion = '2.7.14'
@@ -212,7 +212,7 @@ project.ext.externalDependency = [
'testContainersOpenSearch': 'org.opensearch:opensearch-testcontainers:2.0.0',
'typesafeConfig':'com.typesafe:config:1.4.1',
'wiremock':'com.github.tomakehurst:wiremock:2.10.0',
- 'zookeeper': 'org.apache.zookeeper:zookeeper:3.4.14',
+ 'zookeeper': 'org.apache.zookeeper:zookeeper:3.7.2',
'wire': 'com.squareup.wire:wire-compiler:3.7.1',
'charle': 'com.charleskorn.kaml:kaml:0.53.0',
'common': 'commons-io:commons-io:2.7',
diff --git a/metadata-service/restli-api/build.gradle b/metadata-service/restli-api/build.gradle
index ed4f4118dba30..f182d11b6baeb 100644
--- a/metadata-service/restli-api/build.gradle
+++ b/metadata-service/restli-api/build.gradle
@@ -8,4 +8,10 @@ dependencies {
restClientCompile spec.product.pegasus.d2
restClientCompile spec.product.pegasus.restliClient
+
+ constraints {
+ restClientCompile(externalDependency.zookeeper) {
+ because("CVE-2023-44981")
+ }
+ }
}
\ No newline at end of file
From ac9a0140570b3ada060ce716304f33ff62a1348a Mon Sep 17 00:00:00 2001
From: John Joyce
Date: Mon, 6 Nov 2023 18:33:02 -0800
Subject: [PATCH 066/792] refactor(ui): Rename "dataset details" button text to
"view details" on lineage sidebar profile (#9196)
---
datahub-web-react/src/app/lineage/LineageExplorer.tsx | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/datahub-web-react/src/app/lineage/LineageExplorer.tsx b/datahub-web-react/src/app/lineage/LineageExplorer.tsx
index f59d1843b8a99..28cd7025f51f4 100644
--- a/datahub-web-react/src/app/lineage/LineageExplorer.tsx
+++ b/datahub-web-react/src/app/lineage/LineageExplorer.tsx
@@ -217,7 +217,7 @@ export default function LineageExplorer({ urn, type }: Props) {
Close
- {entityRegistry.getEntityName(selectedEntity.type)} Details
+ View details
)
From 45770013c9bdaadfb49950c67a838aef879a8e8a Mon Sep 17 00:00:00 2001
From: John Joyce
Date: Mon, 6 Nov 2023 18:33:13 -0800
Subject: [PATCH 067/792] feat(ui): Add command-k icons to search bar (#9194)
---
.../src/app/home/HomePageHeader.tsx | 1 +
datahub-web-react/src/app/search/CommandK.tsx | 29 +++++++++++++++
.../src/app/search/SearchBar.tsx | 37 ++++++++++++-------
.../src/app/search/SearchHeader.tsx | 1 +
4 files changed, 55 insertions(+), 13 deletions(-)
create mode 100644 datahub-web-react/src/app/search/CommandK.tsx
diff --git a/datahub-web-react/src/app/home/HomePageHeader.tsx b/datahub-web-react/src/app/home/HomePageHeader.tsx
index e5c01252a865b..0052d54f562eb 100644
--- a/datahub-web-react/src/app/home/HomePageHeader.tsx
+++ b/datahub-web-react/src/app/home/HomePageHeader.tsx
@@ -276,6 +276,7 @@ export const HomePageHeader = () => {
combineSiblings
showQuickFilters
showViewAllResults
+ showCommandK
/>
{searchResultsToShow && searchResultsToShow.length > 0 && (
diff --git a/datahub-web-react/src/app/search/CommandK.tsx b/datahub-web-react/src/app/search/CommandK.tsx
new file mode 100644
index 0000000000000..13e55a0e3f266
--- /dev/null
+++ b/datahub-web-react/src/app/search/CommandK.tsx
@@ -0,0 +1,29 @@
+import React from 'react';
+import styled from 'styled-components';
+import { ANTD_GRAY } from '../entity/shared/constants';
+
+const Container = styled.div`
+ color: ${ANTD_GRAY[6]};
+ background-color: #ffffff;
+ opacity: 0.9;
+ border-color: black;
+ border-radius: 6px;
+ border: 1px solid ${ANTD_GRAY[6]};
+ padding-right: 6px;
+ padding-left: 6px;
+ margin-right: 4px;
+ margin-left: 4px;
+`;
+
+const Letter = styled.span`
+ padding: 2px;
+`;
+
+export const CommandK = () => {
+ return (
+
+ ⌘
+ K
+
+ );
+};
diff --git a/datahub-web-react/src/app/search/SearchBar.tsx b/datahub-web-react/src/app/search/SearchBar.tsx
index 5f797e68fe0e8..a23ead83caf54 100644
--- a/datahub-web-react/src/app/search/SearchBar.tsx
+++ b/datahub-web-react/src/app/search/SearchBar.tsx
@@ -23,6 +23,7 @@ import { navigateToSearchUrl } from './utils/navigateToSearchUrl';
import ViewAllSearchItem from './ViewAllSearchItem';
import { ViewSelect } from '../entity/view/select/ViewSelect';
import { combineSiblingsInAutoComplete } from './utils/combineSiblingsInAutoComplete';
+import { CommandK } from './CommandK';
const StyledAutoComplete = styled(AutoComplete)`
width: 100%;
@@ -114,6 +115,7 @@ interface Props {
fixAutoComplete?: boolean;
hideRecommendations?: boolean;
showQuickFilters?: boolean;
+ showCommandK?: boolean;
viewsEnabled?: boolean;
combineSiblings?: boolean;
setIsSearchBarFocused?: (isSearchBarFocused: boolean) => void;
@@ -142,6 +144,7 @@ export const SearchBar = ({
fixAutoComplete,
hideRecommendations,
showQuickFilters,
+ showCommandK = false,
viewsEnabled = false,
combineSiblings = false,
setIsSearchBarFocused,
@@ -153,6 +156,8 @@ export const SearchBar = ({
const [searchQuery, setSearchQuery] = useState(initialQuery);
const [selected, setSelected] = useState();
const [isDropdownVisible, setIsDropdownVisible] = useState(false);
+ const [isFocused, setIsFocused] = useState(false);
+
useEffect(() => setSelected(initialQuery), [initialQuery]);
const searchEntityTypes = entityRegistry.getSearchEntityTypes();
@@ -277,11 +282,13 @@ export const SearchBar = ({
function handleFocus() {
if (onFocus) onFocus();
handleSearchBarClick(true);
+ setIsFocused(true);
}
function handleBlur() {
if (onBlur) onBlur();
handleSearchBarClick(false);
+ setIsFocused(false);
}
function handleSearch(query: string, type?: EntityType, appliedQuickFilters?: FacetFilterInput[]) {
@@ -294,18 +301,21 @@ export const SearchBar = ({
const searchInputRef = useRef(null);
useEffect(() => {
- const handleKeyDown = (event) => {
- // Support command-k to select the search bar.
- // 75 is the keyCode for 'k'
- if ((event.metaKey || event.ctrlKey) && event.keyCode === 75) {
- (searchInputRef?.current as any)?.focus();
- }
- };
- document.addEventListener('keydown', handleKeyDown);
- return () => {
- document.removeEventListener('keydown', handleKeyDown);
- };
- }, []);
+ if (showCommandK) {
+ const handleKeyDown = (event) => {
+ // Support command-k to select the search bar.
+ // 75 is the keyCode for 'k'
+ if ((event.metaKey || event.ctrlKey) && event.keyCode === 75) {
+ (searchInputRef?.current as any)?.focus();
+ }
+ };
+ document.addEventListener('keydown', handleKeyDown);
+ return () => {
+ document.removeEventListener('keydown', handleKeyDown);
+ };
+ }
+ return () => null;
+ }, [showCommandK]);
return (
@@ -377,7 +387,7 @@ export const SearchBar = ({
data-testid="search-input"
onFocus={handleFocus}
onBlur={handleBlur}
- allowClear={{ clearIcon: }}
+ allowClear={(isFocused && { clearIcon: }) || false}
prefix={
<>
{viewsEnabled && (
@@ -411,6 +421,7 @@ export const SearchBar = ({
>
}
ref={searchInputRef}
+ suffix={(showCommandK && !isFocused && ) || null}
/>
diff --git a/datahub-web-react/src/app/search/SearchHeader.tsx b/datahub-web-react/src/app/search/SearchHeader.tsx
index 91f9753a3d601..76e78a11d3e9d 100644
--- a/datahub-web-react/src/app/search/SearchHeader.tsx
+++ b/datahub-web-react/src/app/search/SearchHeader.tsx
@@ -108,6 +108,7 @@ export const SearchHeader = ({
fixAutoComplete
showQuickFilters
showViewAllResults
+ showCommandK
/>
From 88cde08d060041bfb6f585ed7a486f6ba5886733 Mon Sep 17 00:00:00 2001
From: Chris Collins
Date: Mon, 6 Nov 2023 21:34:17 -0500
Subject: [PATCH 068/792] feat(ui): Update Apollo cache to work with union
types (#9193)
---
datahub-web-react/codegen.yml | 3 ++
datahub-web-react/package.json | 1 +
datahub-web-react/src/App.tsx | 3 ++
datahub-web-react/yarn.lock | 73 ++++++++++++++++++++++++++++++++++
4 files changed, 80 insertions(+)
diff --git a/datahub-web-react/codegen.yml b/datahub-web-react/codegen.yml
index 96a2bd6137920..35728e8aeb7d4 100644
--- a/datahub-web-react/codegen.yml
+++ b/datahub-web-react/codegen.yml
@@ -20,6 +20,9 @@ generates:
src/types.generated.ts:
plugins:
- 'typescript'
+ src/possibleTypes.generated.ts:
+ plugins:
+ - 'fragment-matcher'
src/:
preset: near-operation-file
presetConfig:
diff --git a/datahub-web-react/package.json b/datahub-web-react/package.json
index 2d9329919fdc1..019295f3e6ffe 100644
--- a/datahub-web-react/package.json
+++ b/datahub-web-react/package.json
@@ -11,6 +11,7 @@
"@apollo/client": "^3.3.19",
"@craco/craco": "^6.1.1",
"@data-ui/xy-chart": "^0.0.84",
+ "@graphql-codegen/fragment-matcher": "^5.0.0",
"@miragejs/graphql": "^0.1.11",
"@monaco-editor/react": "^4.3.1",
"@react-hook/window-size": "^3.0.7",
diff --git a/datahub-web-react/src/App.tsx b/datahub-web-react/src/App.tsx
index b6bc608dccbbb..342a89f350429 100644
--- a/datahub-web-react/src/App.tsx
+++ b/datahub-web-react/src/App.tsx
@@ -36,6 +36,7 @@ import { DataPlatformEntity } from './app/entity/dataPlatform/DataPlatformEntity
import { DataProductEntity } from './app/entity/dataProduct/DataProductEntity';
import { DataPlatformInstanceEntity } from './app/entity/dataPlatformInstance/DataPlatformInstanceEntity';
import { RoleEntity } from './app/entity/Access/RoleEntity';
+import possibleTypesResult from './possibleTypes.generated';
/*
Construct Apollo Client
@@ -77,6 +78,8 @@ const client = new ApolloClient({
},
},
},
+ // need to define possibleTypes to allow us to use Apollo cache with union types
+ possibleTypes: possibleTypesResult.possibleTypes,
}),
credentials: 'include',
defaultOptions: {
diff --git a/datahub-web-react/yarn.lock b/datahub-web-react/yarn.lock
index 590f3ebcef8c3..ce0f2f514dad1 100644
--- a/datahub-web-react/yarn.lock
+++ b/datahub-web-react/yarn.lock
@@ -2298,6 +2298,14 @@
"@graphql-tools/utils" "^6"
tslib "~2.0.1"
+"@graphql-codegen/fragment-matcher@^5.0.0":
+ version "5.0.0"
+ resolved "https://registry.yarnpkg.com/@graphql-codegen/fragment-matcher/-/fragment-matcher-5.0.0.tgz#2a016715e42e8f21aa08830f34a4d0a930e660fe"
+ integrity sha512-mbash9E8eY6RSMSNrrO+C9JJEn8rdr8ORaxMpgdWL2qe2q/TlLUCE3ZvQvHkSc7GjBnMEk36LncA8ApwHR2BHg==
+ dependencies:
+ "@graphql-codegen/plugin-helpers" "^5.0.0"
+ tslib "~2.5.0"
+
"@graphql-codegen/near-operation-file-preset@^1.17.13":
version "1.18.6"
resolved "https://registry.yarnpkg.com/@graphql-codegen/near-operation-file-preset/-/near-operation-file-preset-1.18.6.tgz#2378ac75feaeaa1cfd2146bd84bf839b1fe20d9d"
@@ -2331,6 +2339,18 @@
lodash "~4.17.0"
tslib "~2.3.0"
+"@graphql-codegen/plugin-helpers@^5.0.0":
+ version "5.0.1"
+ resolved "https://registry.yarnpkg.com/@graphql-codegen/plugin-helpers/-/plugin-helpers-5.0.1.tgz#e2429fcfba3f078d5aa18aa062d46c922bbb0d55"
+ integrity sha512-6L5sb9D8wptZhnhLLBcheSPU7Tg//DGWgc5tQBWX46KYTOTQHGqDpv50FxAJJOyFVJrveN9otWk9UT9/yfY4ww==
+ dependencies:
+ "@graphql-tools/utils" "^10.0.0"
+ change-case-all "1.0.15"
+ common-tags "1.8.2"
+ import-from "4.0.0"
+ lodash "~4.17.0"
+ tslib "~2.5.0"
+
"@graphql-codegen/typescript-operations@1.17.13":
version "1.17.13"
resolved "https://registry.yarnpkg.com/@graphql-codegen/typescript-operations/-/typescript-operations-1.17.13.tgz#a5b08c1573b9507ca5a9e66e795aecc40ddc5305"
@@ -2584,6 +2604,16 @@
dependencies:
tslib "^2.4.0"
+"@graphql-tools/utils@^10.0.0":
+ version "10.0.8"
+ resolved "https://registry.yarnpkg.com/@graphql-tools/utils/-/utils-10.0.8.tgz#c7b84275ec83dc42ad9f3d4ffc424ff682075759"
+ integrity sha512-yjyA8ycSa1WRlJqyX/aLqXeE5DvF/H02+zXMUFnCzIDrj0UvLMUrxhmVFnMK0Q2n3bh4uuTeY3621m5za9ovXw==
+ dependencies:
+ "@graphql-typed-document-node/core" "^3.1.1"
+ cross-inspect "1.0.0"
+ dset "^3.1.2"
+ tslib "^2.4.0"
+
"@graphql-tools/utils@^6":
version "6.2.4"
resolved "https://registry.yarnpkg.com/@graphql-tools/utils/-/utils-6.2.4.tgz#38a2314d2e5e229ad4f78cca44e1199e18d55856"
@@ -2618,6 +2648,11 @@
resolved "https://registry.yarnpkg.com/@graphql-typed-document-node/core/-/core-3.1.0.tgz#0eee6373e11418bfe0b5638f654df7a4ca6a3950"
integrity sha512-wYn6r8zVZyQJ6rQaALBEln5B1pzxb9shV5Ef97kTvn6yVGrqyXVnDqnU24MXnFubR+rZjBY9NWuxX3FB2sTsjg==
+"@graphql-typed-document-node/core@^3.1.1":
+ version "3.2.0"
+ resolved "https://registry.yarnpkg.com/@graphql-typed-document-node/core/-/core-3.2.0.tgz#5f3d96ec6b2354ad6d8a28bf216a1d97b5426861"
+ integrity sha512-mB9oAsNCm9aM3/SOv4YtBMqZbYj10R7dkq8byBqxGY/ncFwhf2oQzMV+LCRlWoDSEBJ3COiR1yeDvMtsoOsuFQ==
+
"@hapi/address@2.x.x":
version "2.1.4"
resolved "https://registry.yarnpkg.com/@hapi/address/-/address-2.1.4.tgz#5d67ed43f3fd41a69d4b9ff7b56e7c0d1d0a81e5"
@@ -7001,6 +7036,22 @@ change-case-all@1.0.14:
upper-case "^2.0.2"
upper-case-first "^2.0.2"
+change-case-all@1.0.15:
+ version "1.0.15"
+ resolved "https://registry.yarnpkg.com/change-case-all/-/change-case-all-1.0.15.tgz#de29393167fc101d646cd76b0ef23e27d09756ad"
+ integrity sha512-3+GIFhk3sNuvFAJKU46o26OdzudQlPNBCu1ZQi3cMeMHhty1bhDxu2WrEilVNYaGvqUtR1VSigFcJOiS13dRhQ==
+ dependencies:
+ change-case "^4.1.2"
+ is-lower-case "^2.0.2"
+ is-upper-case "^2.0.2"
+ lower-case "^2.0.2"
+ lower-case-first "^2.0.2"
+ sponge-case "^1.0.1"
+ swap-case "^2.0.2"
+ title-case "^3.0.3"
+ upper-case "^2.0.2"
+ upper-case-first "^2.0.2"
+
change-case@^4.1.2:
version "4.1.2"
resolved "https://registry.yarnpkg.com/change-case/-/change-case-4.1.2.tgz#fedfc5f136045e2398c0410ee441f95704641e12"
@@ -7357,6 +7408,11 @@ common-tags@1.8.0, common-tags@^1.8.0:
resolved "https://registry.yarnpkg.com/common-tags/-/common-tags-1.8.0.tgz#8e3153e542d4a39e9b10554434afaaf98956a937"
integrity sha512-6P6g0uetGpW/sdyUy/iQQCbFF0kWVMSIVSyYz7Zgjcgh8mgw8PQzDNZeyZ5DQ2gM7LBoZPHmnjz8rUthkBG5tw==
+common-tags@1.8.2:
+ version "1.8.2"
+ resolved "https://registry.yarnpkg.com/common-tags/-/common-tags-1.8.2.tgz#94ebb3c076d26032745fd54face7f688ef5ac9c6"
+ integrity sha512-gk/Z852D2Wtb//0I+kRFNKKE9dIIVirjoqPoA1wJU+XePVXZfGeBpk45+A1rKO4Q43prqWBNY/MiIeRLbPWUaA==
+
commondir@^1.0.1:
version "1.0.1"
resolved "https://registry.yarnpkg.com/commondir/-/commondir-1.0.1.tgz#ddd800da0c66127393cca5950ea968a3aaf1253b"
@@ -7698,6 +7754,13 @@ cross-fetch@^3.1.5:
dependencies:
node-fetch "2.6.7"
+cross-inspect@1.0.0:
+ version "1.0.0"
+ resolved "https://registry.yarnpkg.com/cross-inspect/-/cross-inspect-1.0.0.tgz#5fda1af759a148594d2d58394a9e21364f6849af"
+ integrity sha512-4PFfn4b5ZN6FMNGSZlyb7wUhuN8wvj8t/VQHZdM4JsDcruGJ8L2kf9zao98QIrBPFCpdk27qst/AGTl7pL3ypQ==
+ dependencies:
+ tslib "^2.4.0"
+
cross-spawn@7.0.3, cross-spawn@^7.0.0, cross-spawn@^7.0.2, cross-spawn@^7.0.3:
version "7.0.3"
resolved "https://registry.yarnpkg.com/cross-spawn/-/cross-spawn-7.0.3.tgz#f73a85b9d5d41d045551c177e2882d4ac85728a6"
@@ -8595,6 +8658,11 @@ dotenv@^8.2.0:
resolved "https://registry.yarnpkg.com/dotenv/-/dotenv-8.6.0.tgz#061af664d19f7f4d8fc6e4ff9b584ce237adcb8b"
integrity sha512-IrPdXQsk2BbzvCBGBOTmmSH5SodmqZNt4ERAZDmW4CT+tL8VtvinqywuANaFu4bOMWki16nqf0e4oC0QIaDr/g==
+dset@^3.1.2:
+ version "3.1.3"
+ resolved "https://registry.yarnpkg.com/dset/-/dset-3.1.3.tgz#c194147f159841148e8e34ca41f638556d9542d2"
+ integrity sha512-20TuZZHCEZ2O71q9/+8BwKwZ0QtD9D8ObhrihJPr+vLLYlSuAU3/zL4cSlgbfeoGHTjCSJBa7NGcrF9/Bx/WJQ==
+
duplexer3@^0.1.4:
version "0.1.4"
resolved "https://registry.yarnpkg.com/duplexer3/-/duplexer3-0.1.4.tgz#ee01dd1cac0ed3cbc7fdbea37dc0a8f1ce002ce2"
@@ -18712,6 +18780,11 @@ tslib@~2.3.0:
resolved "https://registry.yarnpkg.com/tslib/-/tslib-2.3.1.tgz#e8a335add5ceae51aa261d32a490158ef042ef01"
integrity sha512-77EbyPPpMz+FRFRuAFlWMtmgUWGe9UOG2Z25NqCwiIjRhOf5iKGuzSe5P2w1laq+FkRy4p+PCuVkJSGkzTEKVw==
+tslib@~2.5.0:
+ version "2.5.3"
+ resolved "https://registry.yarnpkg.com/tslib/-/tslib-2.5.3.tgz#24944ba2d990940e6e982c4bea147aba80209913"
+ integrity sha512-mSxlJJwl3BMEQCUNnxXBU9jP4JBktcEGhURcPR6VQVlnP0FdDEsIaz0C35dXNGLyRfrATNofF0F5p2KPxQgB+w==
+
tsutils@^3.17.1:
version "3.21.0"
resolved "https://registry.yarnpkg.com/tsutils/-/tsutils-3.21.0.tgz#b48717d394cea6c1e096983eed58e9d61715b623"
From 23c98ecf7a88d11e3b195d457ab42c763818df47 Mon Sep 17 00:00:00 2001
From: david-leifker <114954101+david-leifker@users.noreply.github.com>
Date: Tue, 7 Nov 2023 14:40:48 -0600
Subject: [PATCH 069/792] feat(policy): enable support for 10k+ policies
(#9177)
Co-authored-by: Pedro Silva
---
.../policy/ListPoliciesResolver.java | 26 ++----
.../metadata/client/JavaEntityClient.java | 2 +-
.../metadata/search/SearchService.java | 18 ++--
.../authorization/DataHubAuthorizer.java | 21 ++---
.../datahub/authorization/PolicyFetcher.java | 62 +++++++++++---
.../authorization/DataHubAuthorizerTest.java | 82 +++++++++++++------
.../src/main/resources/application.yml | 1 +
.../auth/DataHubAuthorizerFactory.java | 5 +-
.../linkedin/entity/client/EntityClient.java | 2 +-
.../entity/client/RestliEntityClient.java | 7 +-
.../cypress/e2e/settings/managing_groups.js | 2 +-
11 files changed, 153 insertions(+), 75 deletions(-)
diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/policy/ListPoliciesResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/policy/ListPoliciesResolver.java
index 516d6fa2d3137..b44da1c2f832c 100644
--- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/policy/ListPoliciesResolver.java
+++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/policy/ListPoliciesResolver.java
@@ -40,23 +40,15 @@ public CompletableFuture get(final DataFetchingEnvironment e
final Integer count = input.getCount() == null ? DEFAULT_COUNT : input.getCount();
final String query = input.getQuery() == null ? DEFAULT_QUERY : input.getQuery();
- return CompletableFuture.supplyAsync(() -> {
- try {
- // First, get all policy Urns.
- final PolicyFetcher.PolicyFetchResult policyFetchResult =
- _policyFetcher.fetchPolicies(start, count, query, context.getAuthentication());
-
- // Now that we have entities we can bind this to a result.
- final ListPoliciesResult result = new ListPoliciesResult();
- result.setStart(start);
- result.setCount(count);
- result.setTotal(policyFetchResult.getTotal());
- result.setPolicies(mapEntities(policyFetchResult.getPolicies()));
- return result;
- } catch (Exception e) {
- throw new RuntimeException("Failed to list policies", e);
- }
- });
+ return _policyFetcher.fetchPolicies(start, query, count, context.getAuthentication())
+ .thenApply(policyFetchResult -> {
+ final ListPoliciesResult result = new ListPoliciesResult();
+ result.setStart(start);
+ result.setCount(count);
+ result.setTotal(policyFetchResult.getTotal());
+ result.setPolicies(mapEntities(policyFetchResult.getPolicies()));
+ return result;
+ });
}
throw new AuthorizationException("Unauthorized to perform this action. Please contact your DataHub administrator.");
}
diff --git a/metadata-io/src/main/java/com/linkedin/metadata/client/JavaEntityClient.java b/metadata-io/src/main/java/com/linkedin/metadata/client/JavaEntityClient.java
index a69c6008fea47..dff9a22de8efd 100644
--- a/metadata-io/src/main/java/com/linkedin/metadata/client/JavaEntityClient.java
+++ b/metadata-io/src/main/java/com/linkedin/metadata/client/JavaEntityClient.java
@@ -381,7 +381,7 @@ public SearchResult searchAcrossEntities(
@Nonnull
@Override
public ScrollResult scrollAcrossEntities(@Nonnull List entities, @Nonnull String input,
- @Nullable Filter filter, @Nullable String scrollId, @Nonnull String keepAlive, int count,
+ @Nullable Filter filter, @Nullable String scrollId, @Nullable String keepAlive, int count,
@Nullable SearchFlags searchFlags, @Nonnull Authentication authentication)
throws RemoteInvocationException {
final SearchFlags finalFlags = searchFlags != null ? searchFlags : new SearchFlags().setFulltext(true);
diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/SearchService.java b/metadata-io/src/main/java/com/linkedin/metadata/search/SearchService.java
index 94b8d57efcc16..c99e4a94feb29 100644
--- a/metadata-io/src/main/java/com/linkedin/metadata/search/SearchService.java
+++ b/metadata-io/src/main/java/com/linkedin/metadata/search/SearchService.java
@@ -147,15 +147,23 @@ public SearchResult searchAcrossEntities(@Nonnull List entities, @Nonnul
return result;
}
+ /**
+ * If no entities are provided, fallback to the list of non-empty entities
+ * @param inputEntities the requested entities
+ * @return some entities to search
+ */
private List getEntitiesToSearch(@Nonnull List inputEntities) {
List nonEmptyEntities;
List lowercaseEntities = inputEntities.stream().map(String::toLowerCase).collect(Collectors.toList());
- try (Timer.Context ignored = MetricUtils.timer(this.getClass(), "getNonEmptyEntities").time()) {
- nonEmptyEntities = _entityDocCountCache.getNonEmptyEntities();
- }
- if (!inputEntities.isEmpty()) {
- nonEmptyEntities = nonEmptyEntities.stream().filter(lowercaseEntities::contains).collect(Collectors.toList());
+
+ if (lowercaseEntities.isEmpty()) {
+ try (Timer.Context ignored = MetricUtils.timer(this.getClass(), "getNonEmptyEntities").time()) {
+ nonEmptyEntities = _entityDocCountCache.getNonEmptyEntities();
+ }
+ } else {
+ nonEmptyEntities = lowercaseEntities;
}
+
return nonEmptyEntities;
}
diff --git a/metadata-service/auth-impl/src/main/java/com/datahub/authorization/DataHubAuthorizer.java b/metadata-service/auth-impl/src/main/java/com/datahub/authorization/DataHubAuthorizer.java
index f8b28f6c182a7..f8f99475de23e 100644
--- a/metadata-service/auth-impl/src/main/java/com/datahub/authorization/DataHubAuthorizer.java
+++ b/metadata-service/auth-impl/src/main/java/com/datahub/authorization/DataHubAuthorizer.java
@@ -72,11 +72,13 @@ public DataHubAuthorizer(
final EntityClient entityClient,
final int delayIntervalSeconds,
final int refreshIntervalSeconds,
- final AuthorizationMode mode) {
+ final AuthorizationMode mode,
+ final int policyFetchSize) {
_systemAuthentication = Objects.requireNonNull(systemAuthentication);
_mode = Objects.requireNonNull(mode);
_policyEngine = new PolicyEngine(systemAuthentication, Objects.requireNonNull(entityClient));
- _policyRefreshRunnable = new PolicyRefreshRunnable(systemAuthentication, new PolicyFetcher(entityClient), _policyCache, readWriteLock.writeLock());
+ _policyRefreshRunnable = new PolicyRefreshRunnable(systemAuthentication, new PolicyFetcher(entityClient), _policyCache,
+ readWriteLock.writeLock(), policyFetchSize);
_refreshExecutorService.scheduleAtFixedRate(_policyRefreshRunnable, delayIntervalSeconds, refreshIntervalSeconds, TimeUnit.SECONDS);
}
@@ -244,29 +246,28 @@ static class PolicyRefreshRunnable implements Runnable {
private final PolicyFetcher _policyFetcher;
private final Map> _policyCache;
private final Lock writeLock;
+ private final int count;
@Override
public void run() {
try {
// Populate new cache and swap.
Map> newCache = new HashMap<>();
+ Integer total = null;
+ String scrollId = null;
- int start = 0;
- int count = 30;
- int total = 30;
-
- while (start < total) {
+ while (total == null || scrollId != null) {
try {
final PolicyFetcher.PolicyFetchResult
- policyFetchResult = _policyFetcher.fetchPolicies(start, count, _systemAuthentication);
+ policyFetchResult = _policyFetcher.fetchPolicies(count, scrollId, _systemAuthentication);
addPoliciesToCache(newCache, policyFetchResult.getPolicies());
total = policyFetchResult.getTotal();
- start = start + count;
+ scrollId = policyFetchResult.getScrollId();
} catch (Exception e) {
log.error(
- "Failed to retrieve policy urns! Skipping updating policy cache until next refresh. start: {}, count: {}", start, count, e);
+ "Failed to retrieve policy urns! Skipping updating policy cache until next refresh. count: {}, scrollId: {}", count, scrollId, e);
return;
}
}
diff --git a/metadata-service/auth-impl/src/main/java/com/datahub/authorization/PolicyFetcher.java b/metadata-service/auth-impl/src/main/java/com/datahub/authorization/PolicyFetcher.java
index 92d12bad41c9f..c06da4d245f91 100644
--- a/metadata-service/auth-impl/src/main/java/com/datahub/authorization/PolicyFetcher.java
+++ b/metadata-service/auth-impl/src/main/java/com/datahub/authorization/PolicyFetcher.java
@@ -8,8 +8,8 @@
import com.linkedin.metadata.query.SearchFlags;
import com.linkedin.metadata.query.filter.SortCriterion;
import com.linkedin.metadata.query.filter.SortOrder;
+import com.linkedin.metadata.search.ScrollResult;
import com.linkedin.metadata.search.SearchEntity;
-import com.linkedin.metadata.search.SearchResult;
import com.linkedin.policy.DataHubPolicyInfo;
import com.linkedin.r2.RemoteInvocationException;
import java.net.URISyntaxException;
@@ -18,11 +18,14 @@
import java.util.List;
import java.util.Map;
import java.util.Objects;
+import java.util.concurrent.CompletableFuture;
import java.util.stream.Collectors;
import lombok.RequiredArgsConstructor;
import lombok.Value;
import lombok.extern.slf4j.Slf4j;
+import javax.annotation.Nullable;
+
import static com.linkedin.metadata.Constants.DATAHUB_POLICY_INFO_ASPECT_NAME;
import static com.linkedin.metadata.Constants.POLICY_ENTITY_NAME;
@@ -38,22 +41,53 @@ public class PolicyFetcher {
private static final SortCriterion POLICY_SORT_CRITERION =
new SortCriterion().setField("lastUpdatedTimestamp").setOrder(SortOrder.DESCENDING);
- public PolicyFetchResult fetchPolicies(int start, int count, Authentication authentication)
- throws RemoteInvocationException, URISyntaxException {
- return fetchPolicies(start, count, "", authentication);
+ /**
+ * This is to provide a scroll implementation using the start/count api. It is not efficient
+ * and the scroll native functions should be used instead. This does fix a failure to fetch
+ * policies when deep pagination happens where there are >10k policies.
+ * Exists primarily to prevent breaking change to the graphql api.
+ */
+ @Deprecated
+ public CompletableFuture fetchPolicies(int start, String query, int count, Authentication authentication) {
+ return CompletableFuture.supplyAsync(() -> {
+ try {
+ PolicyFetchResult result = PolicyFetchResult.EMPTY;
+ String scrollId = "";
+ int fetchedResults = 0;
+
+ while (PolicyFetchResult.EMPTY.equals(result) && scrollId != null) {
+ PolicyFetchResult tmpResult = fetchPolicies(query, count, scrollId.isEmpty() ? null : scrollId, authentication);
+ fetchedResults += tmpResult.getPolicies().size();
+ scrollId = tmpResult.getScrollId();
+ if (fetchedResults > start) {
+ result = tmpResult;
+ }
+ }
+
+ return result;
+ } catch (Exception e) {
+ throw new RuntimeException("Failed to list policies", e);
+ }
+ });
}
- public PolicyFetchResult fetchPolicies(int start, int count, String query, Authentication authentication)
+ public PolicyFetchResult fetchPolicies(int count, @Nullable String scrollId, Authentication authentication)
+ throws RemoteInvocationException, URISyntaxException {
+ return fetchPolicies("", count, scrollId, authentication);
+ }
+
+ public PolicyFetchResult fetchPolicies(String query, int count, @Nullable String scrollId, Authentication authentication)
throws RemoteInvocationException, URISyntaxException {
- log.debug(String.format("Batch fetching policies. start: %s, count: %s ", start, count));
- // First fetch all policy urns from start - start + count
- SearchResult result =
- _entityClient.search(POLICY_ENTITY_NAME, query, null, POLICY_SORT_CRITERION, start, count, authentication,
- new SearchFlags().setFulltext(true));
+ log.debug(String.format("Batch fetching policies. count: %s, scroll: %s", count, scrollId));
+
+ // First fetch all policy urns
+ ScrollResult result = _entityClient.scrollAcrossEntities(List.of(POLICY_ENTITY_NAME), query, null, scrollId,
+ null, count, new SearchFlags().setSkipCache(true).setSkipAggregates(true)
+ .setSkipHighlighting(true).setFulltext(true), authentication);
List policyUrns = result.getEntities().stream().map(SearchEntity::getEntity).collect(Collectors.toList());
if (policyUrns.isEmpty()) {
- return new PolicyFetchResult(Collections.emptyList(), 0);
+ return PolicyFetchResult.EMPTY;
}
// Fetch DataHubPolicyInfo aspects for each urn
@@ -64,7 +98,7 @@ public PolicyFetchResult fetchPolicies(int start, int count, String query, Authe
.filter(Objects::nonNull)
.map(this::extractPolicy)
.filter(Objects::nonNull)
- .collect(Collectors.toList()), result.getNumEntities());
+ .collect(Collectors.toList()), result.getNumEntities(), result.getScrollId());
}
private Policy extractPolicy(EntityResponse entityResponse) {
@@ -82,6 +116,10 @@ private Policy extractPolicy(EntityResponse entityResponse) {
public static class PolicyFetchResult {
List policies;
int total;
+ @Nullable
+ String scrollId;
+
+ public static final PolicyFetchResult EMPTY = new PolicyFetchResult(Collections.emptyList(), 0, null);
}
@Value
diff --git a/metadata-service/auth-impl/src/test/java/com/datahub/authorization/DataHubAuthorizerTest.java b/metadata-service/auth-impl/src/test/java/com/datahub/authorization/DataHubAuthorizerTest.java
index 24ecfa6fefc85..babb1c5d00ee8 100644
--- a/metadata-service/auth-impl/src/test/java/com/datahub/authorization/DataHubAuthorizerTest.java
+++ b/metadata-service/auth-impl/src/test/java/com/datahub/authorization/DataHubAuthorizerTest.java
@@ -22,6 +22,7 @@
import com.linkedin.entity.EnvelopedAspectMap;
import com.linkedin.entity.client.EntityClient;
import com.linkedin.metadata.query.SearchFlags;
+import com.linkedin.metadata.search.ScrollResult;
import com.linkedin.metadata.search.SearchEntity;
import com.linkedin.metadata.search.SearchEntityArray;
import com.linkedin.metadata.search.SearchResult;
@@ -35,6 +36,8 @@
import java.util.List;
import java.util.Map;
import java.util.Optional;
+import java.util.Set;
+
import org.testng.annotations.BeforeMethod;
import org.testng.annotations.Test;
@@ -89,30 +92,58 @@ public void setupTest() throws Exception {
final EnvelopedAspectMap childDomainPolicyAspectMap = new EnvelopedAspectMap();
childDomainPolicyAspectMap.put(DATAHUB_POLICY_INFO_ASPECT_NAME, new EnvelopedAspect().setValue(new Aspect(childDomainPolicy.data())));
- final SearchResult policySearchResult = new SearchResult();
- policySearchResult.setNumEntities(3);
- policySearchResult.setEntities(
- new SearchEntityArray(
- ImmutableList.of(
- new SearchEntity().setEntity(activePolicyUrn),
- new SearchEntity().setEntity(inactivePolicyUrn),
- new SearchEntity().setEntity(parentDomainPolicyUrn),
- new SearchEntity().setEntity(childDomainPolicyUrn)
- )
- )
- );
-
- when(_entityClient.search(eq("dataHubPolicy"), eq(""), isNull(), any(), anyInt(), anyInt(), any(),
- eq(new SearchFlags().setFulltext(true)))).thenReturn(policySearchResult);
- when(_entityClient.batchGetV2(eq(POLICY_ENTITY_NAME),
- eq(ImmutableSet.of(activePolicyUrn, inactivePolicyUrn, parentDomainPolicyUrn, childDomainPolicyUrn)), eq(null), any())).thenReturn(
- ImmutableMap.of(
- activePolicyUrn, new EntityResponse().setUrn(activePolicyUrn).setAspects(activeAspectMap),
- inactivePolicyUrn, new EntityResponse().setUrn(inactivePolicyUrn).setAspects(inactiveAspectMap),
- parentDomainPolicyUrn, new EntityResponse().setUrn(parentDomainPolicyUrn).setAspects(parentDomainPolicyAspectMap),
- childDomainPolicyUrn, new EntityResponse().setUrn(childDomainPolicyUrn).setAspects(childDomainPolicyAspectMap)
- )
- );
+ final ScrollResult policySearchResult1 = new ScrollResult()
+ .setScrollId("1")
+ .setNumEntities(4)
+ .setEntities(
+ new SearchEntityArray(
+ ImmutableList.of(new SearchEntity().setEntity(activePolicyUrn))));
+
+ final ScrollResult policySearchResult2 = new ScrollResult()
+ .setScrollId("2")
+ .setNumEntities(4)
+ .setEntities(
+ new SearchEntityArray(
+ ImmutableList.of(new SearchEntity().setEntity(inactivePolicyUrn))));
+
+ final ScrollResult policySearchResult3 = new ScrollResult()
+ .setScrollId("3")
+ .setNumEntities(4)
+ .setEntities(
+ new SearchEntityArray(
+ ImmutableList.of(new SearchEntity().setEntity(parentDomainPolicyUrn))));
+
+ final ScrollResult policySearchResult4 = new ScrollResult()
+ .setNumEntities(4)
+ .setEntities(
+ new SearchEntityArray(
+ ImmutableList.of(
+ new SearchEntity().setEntity(childDomainPolicyUrn))));
+
+ when(_entityClient.scrollAcrossEntities(eq(List.of("dataHubPolicy")), eq(""), isNull(), any(), isNull(),
+ anyInt(), eq(new SearchFlags().setFulltext(true).setSkipAggregates(true).setSkipHighlighting(true).setSkipCache(true)), any()))
+ .thenReturn(policySearchResult1)
+ .thenReturn(policySearchResult2)
+ .thenReturn(policySearchResult3)
+ .thenReturn(policySearchResult4);
+
+ when(_entityClient.batchGetV2(eq(POLICY_ENTITY_NAME), any(), eq(null), any())).thenAnswer(args -> {
+ Set inputUrns = args.getArgument(1);
+ Urn urn = inputUrns.stream().findFirst().get();
+
+ switch (urn.toString()) {
+ case "urn:li:dataHubPolicy:0":
+ return Map.of(activePolicyUrn, new EntityResponse().setUrn(activePolicyUrn).setAspects(activeAspectMap));
+ case "urn:li:dataHubPolicy:1":
+ return Map.of(inactivePolicyUrn, new EntityResponse().setUrn(inactivePolicyUrn).setAspects(inactiveAspectMap));
+ case "urn:li:dataHubPolicy:2":
+ return Map.of(parentDomainPolicyUrn, new EntityResponse().setUrn(parentDomainPolicyUrn).setAspects(parentDomainPolicyAspectMap));
+ case "urn:li:dataHubPolicy:3":
+ return Map.of(childDomainPolicyUrn, new EntityResponse().setUrn(childDomainPolicyUrn).setAspects(childDomainPolicyAspectMap));
+ default:
+ throw new IllegalStateException();
+ }
+ });
final List userUrns = ImmutableList.of(Urn.createFromString("urn:li:corpuser:user3"), Urn.createFromString("urn:li:corpuser:user4"));
final List groupUrns = ImmutableList.of(Urn.createFromString("urn:li:corpGroup:group3"), Urn.createFromString("urn:li:corpGroup:group4"));
@@ -146,7 +177,8 @@ childDomainPolicyUrn, new EntityResponse().setUrn(childDomainPolicyUrn).setAspec
_entityClient,
10,
10,
- DataHubAuthorizer.AuthorizationMode.DEFAULT
+ DataHubAuthorizer.AuthorizationMode.DEFAULT,
+ 1 // force pagination logic
);
_dataHubAuthorizer.init(Collections.emptyMap(), createAuthorizerContext(systemAuthentication, _entityClient));
_dataHubAuthorizer.invalidateCache();
diff --git a/metadata-service/configuration/src/main/resources/application.yml b/metadata-service/configuration/src/main/resources/application.yml
index 91b10a75c922e..e9113d339e81d 100644
--- a/metadata-service/configuration/src/main/resources/application.yml
+++ b/metadata-service/configuration/src/main/resources/application.yml
@@ -39,6 +39,7 @@ authorization:
defaultAuthorizer:
enabled: ${AUTH_POLICIES_ENABLED:true}
cacheRefreshIntervalSecs: ${POLICY_CACHE_REFRESH_INTERVAL_SECONDS:120}
+ cachePolicyFetchSize: ${POLICY_CACHE_FETCH_SIZE:1000}
# Enables authorization of reads, writes, and deletes on REST APIs. Defaults to false for backwards compatibility, but should become true down the road
restApiAuthorization: ${REST_API_AUTHORIZATION_ENABLED:false}
diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/auth/DataHubAuthorizerFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/auth/DataHubAuthorizerFactory.java
index 5b298a453547a..663234e2519fa 100644
--- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/auth/DataHubAuthorizerFactory.java
+++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/auth/DataHubAuthorizerFactory.java
@@ -32,6 +32,9 @@ public class DataHubAuthorizerFactory {
@Value("${authorization.defaultAuthorizer.cacheRefreshIntervalSecs}")
private Integer policyCacheRefreshIntervalSeconds;
+ @Value("${authorization.defaultAuthorizer.cachePolicyFetchSize}")
+ private Integer policyCacheFetchSize;
+
@Value("${authorization.defaultAuthorizer.enabled:true}")
private Boolean policiesEnabled;
@@ -44,6 +47,6 @@ protected DataHubAuthorizer getInstance() {
: DataHubAuthorizer.AuthorizationMode.ALLOW_ALL;
return new DataHubAuthorizer(systemAuthentication, entityClient, 10,
- policyCacheRefreshIntervalSeconds, mode);
+ policyCacheRefreshIntervalSeconds, mode, policyCacheFetchSize);
}
}
diff --git a/metadata-service/restli-client/src/main/java/com/linkedin/entity/client/EntityClient.java b/metadata-service/restli-client/src/main/java/com/linkedin/entity/client/EntityClient.java
index b9661ec75e1b1..84d0ed6b9594d 100644
--- a/metadata-service/restli-client/src/main/java/com/linkedin/entity/client/EntityClient.java
+++ b/metadata-service/restli-client/src/main/java/com/linkedin/entity/client/EntityClient.java
@@ -241,7 +241,7 @@ public SearchResult searchAcrossEntities(@Nonnull List entities, @Nonnul
*/
@Nonnull
ScrollResult scrollAcrossEntities(@Nonnull List entities, @Nonnull String input,
- @Nullable Filter filter, @Nullable String scrollId, @Nonnull String keepAlive, int count, @Nullable SearchFlags searchFlags,
+ @Nullable Filter filter, @Nullable String scrollId, @Nullable String keepAlive, int count, @Nullable SearchFlags searchFlags,
@Nonnull Authentication authentication)
throws RemoteInvocationException;
diff --git a/metadata-service/restli-client/src/main/java/com/linkedin/entity/client/RestliEntityClient.java b/metadata-service/restli-client/src/main/java/com/linkedin/entity/client/RestliEntityClient.java
index 47a00e711a935..2716e27518fcc 100644
--- a/metadata-service/restli-client/src/main/java/com/linkedin/entity/client/RestliEntityClient.java
+++ b/metadata-service/restli-client/src/main/java/com/linkedin/entity/client/RestliEntityClient.java
@@ -482,11 +482,11 @@ public SearchResult searchAcrossEntities(@Nonnull List entities, @Nonnul
@Nonnull
@Override
public ScrollResult scrollAcrossEntities(@Nonnull List entities, @Nonnull String input,
- @Nullable Filter filter, @Nullable String scrollId, @Nonnull String keepAlive, int count,
+ @Nullable Filter filter, @Nullable String scrollId, @Nullable String keepAlive, int count,
@Nullable SearchFlags searchFlags, @Nonnull Authentication authentication)
throws RemoteInvocationException {
final EntitiesDoScrollAcrossEntitiesRequestBuilder requestBuilder =
- ENTITIES_REQUEST_BUILDERS.actionScrollAcrossEntities().inputParam(input).countParam(count).keepAliveParam(keepAlive);
+ ENTITIES_REQUEST_BUILDERS.actionScrollAcrossEntities().inputParam(input).countParam(count);
if (entities != null) {
requestBuilder.entitiesParam(new StringArray(entities));
@@ -500,6 +500,9 @@ public ScrollResult scrollAcrossEntities(@Nonnull List entities, @Nonnul
if (searchFlags != null) {
requestBuilder.searchFlagsParam(searchFlags);
}
+ if (keepAlive != null) {
+ requestBuilder.keepAliveParam(keepAlive);
+ }
return sendClientRequest(requestBuilder, authentication).getEntity();
}
diff --git a/smoke-test/tests/cypress/cypress/e2e/settings/managing_groups.js b/smoke-test/tests/cypress/cypress/e2e/settings/managing_groups.js
index 9559435ff01c8..8d689c7e2303c 100644
--- a/smoke-test/tests/cypress/cypress/e2e/settings/managing_groups.js
+++ b/smoke-test/tests/cypress/cypress/e2e/settings/managing_groups.js
@@ -81,7 +81,7 @@ describe("create and manage group", () => {
cy.focused().type(expected_name);
cy.get(".ant-select-item-option").contains(expected_name, { matchCase: false }).click();
cy.focused().blur();
- cy.contains(expected_name).should("have.length", 1);
+ cy.contains(expected_name, { matchCase: false }).should("have.length", 1);
cy.get('[role="dialog"] button').contains("Done").click();
cy.waitTextVisible("Owners Added");
cy.contains(expected_name, { matchCase: false }).should("be.visible");
From 353584c10acbee7554c2eb255512173f24e86785 Mon Sep 17 00:00:00 2001
From: david-leifker <114954101+david-leifker@users.noreply.github.com>
Date: Tue, 7 Nov 2023 18:22:18 -0600
Subject: [PATCH 070/792] feat(browsepathv2): Allow system-update to reprocess
browse paths v2 (#9200)
---
.../steps/BackfillBrowsePathsV2Step.java | 86 ++++++++++++++-----
.../env/docker-without-neo4j.env | 1 +
docker/datahub-upgrade/env/docker.env | 1 +
docker/docker-compose.dev.yml | 4 +
.../docker-compose-m1.quickstart.yml | 1 +
...er-compose-without-neo4j-m1.quickstart.yml | 1 +
...ocker-compose-without-neo4j.quickstart.yml | 1 +
.../quickstart/docker-compose.quickstart.yml | 1 +
.../client/CachingEntitySearchService.java | 16 ++--
.../elasticsearch/query/ESSearchDAO.java | 4 +-
.../query/request/SearchRequestHandler.java | 8 +-
.../src/main/resources/application.yml | 2 +
.../metadata/search/EntitySearchService.java | 4 +-
13 files changed, 94 insertions(+), 36 deletions(-)
diff --git a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/entity/steps/BackfillBrowsePathsV2Step.java b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/entity/steps/BackfillBrowsePathsV2Step.java
index 7547186ccfb23..08a752d9597f4 100644
--- a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/entity/steps/BackfillBrowsePathsV2Step.java
+++ b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/entity/steps/BackfillBrowsePathsV2Step.java
@@ -6,6 +6,7 @@
import com.linkedin.common.BrowsePathsV2;
import com.linkedin.common.urn.Urn;
import com.linkedin.common.urn.UrnUtils;
+import com.linkedin.data.template.StringArray;
import com.linkedin.datahub.upgrade.UpgradeContext;
import com.linkedin.datahub.upgrade.UpgradeStep;
import com.linkedin.datahub.upgrade.UpgradeStepResult;
@@ -13,6 +14,7 @@
import com.linkedin.events.metadata.ChangeType;
import com.linkedin.metadata.Constants;
import com.linkedin.metadata.entity.EntityService;
+import com.linkedin.metadata.query.SearchFlags;
import com.linkedin.metadata.query.filter.Condition;
import com.linkedin.metadata.query.filter.ConjunctiveCriterion;
import com.linkedin.metadata.query.filter.ConjunctiveCriterionArray;
@@ -37,6 +39,8 @@
public class BackfillBrowsePathsV2Step implements UpgradeStep {
public static final String BACKFILL_BROWSE_PATHS_V2 = "BACKFILL_BROWSE_PATHS_V2";
+ public static final String REPROCESS_DEFAULT_BROWSE_PATHS_V2 = "REPROCESS_DEFAULT_BROWSE_PATHS_V2";
+ public static final String DEFAULT_BROWSE_PATH_V2 = "␟Default";
private static final Set ENTITY_TYPES_TO_MIGRATE = ImmutableSet.of(
Constants.DATASET_ENTITY_NAME,
@@ -81,27 +85,14 @@ public Function executable() {
private String backfillBrowsePathsV2(String entityType, AuditStamp auditStamp, String scrollId) {
- // Condition: has `browsePaths` AND does NOT have `browsePathV2`
- Criterion missingBrowsePathV2 = new Criterion();
- missingBrowsePathV2.setCondition(Condition.IS_NULL);
- missingBrowsePathV2.setField("browsePathV2");
- // Excludes entities without browsePaths
- Criterion hasBrowsePathV1 = new Criterion();
- hasBrowsePathV1.setCondition(Condition.EXISTS);
- hasBrowsePathV1.setField("browsePaths");
-
- CriterionArray criterionArray = new CriterionArray();
- criterionArray.add(missingBrowsePathV2);
- criterionArray.add(hasBrowsePathV1);
-
- ConjunctiveCriterion conjunctiveCriterion = new ConjunctiveCriterion();
- conjunctiveCriterion.setAnd(criterionArray);
+ final Filter filter;
- ConjunctiveCriterionArray conjunctiveCriterionArray = new ConjunctiveCriterionArray();
- conjunctiveCriterionArray.add(conjunctiveCriterion);
-
- Filter filter = new Filter();
- filter.setOr(conjunctiveCriterionArray);
+ if (System.getenv().containsKey(REPROCESS_DEFAULT_BROWSE_PATHS_V2)
+ && Boolean.parseBoolean(System.getenv(REPROCESS_DEFAULT_BROWSE_PATHS_V2))) {
+ filter = backfillDefaultBrowsePathsV2Filter();
+ } else {
+ filter = backfillBrowsePathsV2Filter();
+ }
final ScrollResult scrollResult = _searchService.scrollAcrossEntities(
ImmutableList.of(entityType),
@@ -109,9 +100,9 @@ private String backfillBrowsePathsV2(String entityType, AuditStamp auditStamp, S
filter,
null,
scrollId,
- "5m",
+ null,
BATCH_SIZE,
- null
+ new SearchFlags().setFulltext(true).setSkipCache(true).setSkipHighlighting(true).setSkipAggregates(true)
);
if (scrollResult.getNumEntities() == 0 || scrollResult.getEntities().size() == 0) {
return null;
@@ -129,6 +120,55 @@ private String backfillBrowsePathsV2(String entityType, AuditStamp auditStamp, S
return scrollResult.getScrollId();
}
+ private Filter backfillBrowsePathsV2Filter() {
+ // Condition: has `browsePaths` AND does NOT have `browsePathV2`
+ Criterion missingBrowsePathV2 = new Criterion();
+ missingBrowsePathV2.setCondition(Condition.IS_NULL);
+ missingBrowsePathV2.setField("browsePathV2");
+ // Excludes entities without browsePaths
+ Criterion hasBrowsePathV1 = new Criterion();
+ hasBrowsePathV1.setCondition(Condition.EXISTS);
+ hasBrowsePathV1.setField("browsePaths");
+
+ CriterionArray criterionArray = new CriterionArray();
+ criterionArray.add(missingBrowsePathV2);
+ criterionArray.add(hasBrowsePathV1);
+
+ ConjunctiveCriterion conjunctiveCriterion = new ConjunctiveCriterion();
+ conjunctiveCriterion.setAnd(criterionArray);
+
+ ConjunctiveCriterionArray conjunctiveCriterionArray = new ConjunctiveCriterionArray();
+ conjunctiveCriterionArray.add(conjunctiveCriterion);
+
+ Filter filter = new Filter();
+ filter.setOr(conjunctiveCriterionArray);
+ return filter;
+ }
+
+ private Filter backfillDefaultBrowsePathsV2Filter() {
+ // Condition: has default `browsePathV2`
+ Criterion hasDefaultBrowsePathV2 = new Criterion();
+ hasDefaultBrowsePathV2.setCondition(Condition.EQUAL);
+ hasDefaultBrowsePathV2.setField("browsePathV2");
+ StringArray values = new StringArray();
+ values.add(DEFAULT_BROWSE_PATH_V2);
+ hasDefaultBrowsePathV2.setValues(values);
+ hasDefaultBrowsePathV2.setValue(DEFAULT_BROWSE_PATH_V2); // not used, but required field?
+
+ CriterionArray criterionArray = new CriterionArray();
+ criterionArray.add(hasDefaultBrowsePathV2);
+
+ ConjunctiveCriterion conjunctiveCriterion = new ConjunctiveCriterion();
+ conjunctiveCriterion.setAnd(criterionArray);
+
+ ConjunctiveCriterionArray conjunctiveCriterionArray = new ConjunctiveCriterionArray();
+ conjunctiveCriterionArray.add(conjunctiveCriterion);
+
+ Filter filter = new Filter();
+ filter.setOr(conjunctiveCriterionArray);
+ return filter;
+ }
+
private void ingestBrowsePathsV2(Urn urn, AuditStamp auditStamp) throws Exception {
BrowsePathsV2 browsePathsV2 = _entityService.buildDefaultBrowsePathV2(urn, true);
log.debug(String.format("Adding browse path v2 for urn %s with value %s", urn, browsePathsV2));
@@ -142,7 +182,7 @@ private void ingestBrowsePathsV2(Urn urn, AuditStamp auditStamp) throws Exceptio
_entityService.ingestProposal(
proposal,
auditStamp,
- false
+ true
);
}
diff --git a/docker/datahub-upgrade/env/docker-without-neo4j.env b/docker/datahub-upgrade/env/docker-without-neo4j.env
index c399f71b7b15c..04d888f076cd6 100644
--- a/docker/datahub-upgrade/env/docker-without-neo4j.env
+++ b/docker/datahub-upgrade/env/docker-without-neo4j.env
@@ -21,6 +21,7 @@ DATAHUB_GMS_PORT=8080
ENTITY_REGISTRY_CONFIG_PATH=/datahub/datahub-gms/resources/entity-registry.yml
BACKFILL_BROWSE_PATHS_V2=true
+REPROCESS_DEFAULT_BROWSE_PATHS_V2=${REPROCESS_DEFAULT_BROWSE_PATHS_V2:-false}
# Uncomment and set these to support SSL connection to Elasticsearch
# ELASTICSEARCH_USE_SSL=
diff --git a/docker/datahub-upgrade/env/docker.env b/docker/datahub-upgrade/env/docker.env
index 491470406153b..b2a0d01e5d4ae 100644
--- a/docker/datahub-upgrade/env/docker.env
+++ b/docker/datahub-upgrade/env/docker.env
@@ -25,6 +25,7 @@ DATAHUB_GMS_PORT=8080
ENTITY_REGISTRY_CONFIG_PATH=/datahub/datahub-gms/resources/entity-registry.yml
BACKFILL_BROWSE_PATHS_V2=true
+REPROCESS_DEFAULT_BROWSE_PATHS_V2=${REPROCESS_DEFAULT_BROWSE_PATHS_V2:-false}
# Uncomment and set these to support SSL connection to Elasticsearch
# ELASTICSEARCH_USE_SSL=
diff --git a/docker/docker-compose.dev.yml b/docker/docker-compose.dev.yml
index c4e5ee7fa0cae..774c4e17bee21 100644
--- a/docker/docker-compose.dev.yml
+++ b/docker/docker-compose.dev.yml
@@ -54,6 +54,8 @@ services:
- ${HOME}/.datahub/plugins:/etc/datahub/plugins
datahub-upgrade:
image: acryldata/datahub-upgrade:debug
+ ports:
+ - ${DATAHUB_MAPPED_UPGRADE_DEBUG_PORT:-5003}:5003
build:
context: datahub-upgrade
dockerfile: Dockerfile
@@ -63,6 +65,8 @@ services:
- SKIP_ELASTICSEARCH_CHECK=false
- DATAHUB_SERVER_TYPE=${DATAHUB_SERVER_TYPE:-dev}
- DATAHUB_TELEMETRY_ENABLED=${DATAHUB_TELEMETRY_ENABLED:-true}
+ - REPROCESS_DEFAULT_BROWSE_PATHS_V2=${REPROCESS_DEFAULT_BROWSE_PATHS_V2:-false}
+ - JAVA_TOOL_OPTIONS=-agentlib:jdwp=transport=dt_socket,server=y,suspend=n,address=*:5003
volumes:
- ../datahub-upgrade/build/libs/:/datahub/datahub-upgrade/bin/
- ../metadata-models/src/main/resources/:/datahub/datahub-gms/resources
diff --git a/docker/quickstart/docker-compose-m1.quickstart.yml b/docker/quickstart/docker-compose-m1.quickstart.yml
index 3b6d02c83d0f0..c96baf37551b2 100644
--- a/docker/quickstart/docker-compose-m1.quickstart.yml
+++ b/docker/quickstart/docker-compose-m1.quickstart.yml
@@ -151,6 +151,7 @@ services:
- DATAHUB_GMS_PORT=8080
- ENTITY_REGISTRY_CONFIG_PATH=/datahub/datahub-gms/resources/entity-registry.yml
- BACKFILL_BROWSE_PATHS_V2=true
+ - REPROCESS_DEFAULT_BROWSE_PATHS_V2=false
hostname: datahub-upgrade
image: ${DATAHUB_UPGRADE_IMAGE:-acryldata/datahub-upgrade}:${DATAHUB_VERSION:-head}
labels:
diff --git a/docker/quickstart/docker-compose-without-neo4j-m1.quickstart.yml b/docker/quickstart/docker-compose-without-neo4j-m1.quickstart.yml
index e45bafc3da480..b1cb6c208a42d 100644
--- a/docker/quickstart/docker-compose-without-neo4j-m1.quickstart.yml
+++ b/docker/quickstart/docker-compose-without-neo4j-m1.quickstart.yml
@@ -144,6 +144,7 @@ services:
- DATAHUB_GMS_PORT=8080
- ENTITY_REGISTRY_CONFIG_PATH=/datahub/datahub-gms/resources/entity-registry.yml
- BACKFILL_BROWSE_PATHS_V2=true
+ - REPROCESS_DEFAULT_BROWSE_PATHS_V2=false
hostname: datahub-upgrade
image: ${DATAHUB_UPGRADE_IMAGE:-acryldata/datahub-upgrade}:${DATAHUB_VERSION:-head}
labels:
diff --git a/docker/quickstart/docker-compose-without-neo4j.quickstart.yml b/docker/quickstart/docker-compose-without-neo4j.quickstart.yml
index 020ef5e9a97b9..ab5182bf98ae5 100644
--- a/docker/quickstart/docker-compose-without-neo4j.quickstart.yml
+++ b/docker/quickstart/docker-compose-without-neo4j.quickstart.yml
@@ -144,6 +144,7 @@ services:
- DATAHUB_GMS_PORT=8080
- ENTITY_REGISTRY_CONFIG_PATH=/datahub/datahub-gms/resources/entity-registry.yml
- BACKFILL_BROWSE_PATHS_V2=true
+ - REPROCESS_DEFAULT_BROWSE_PATHS_V2=false
hostname: datahub-upgrade
image: ${DATAHUB_UPGRADE_IMAGE:-acryldata/datahub-upgrade}:${DATAHUB_VERSION:-head}
labels:
diff --git a/docker/quickstart/docker-compose.quickstart.yml b/docker/quickstart/docker-compose.quickstart.yml
index 8adc2b9063b84..8a66521cbb522 100644
--- a/docker/quickstart/docker-compose.quickstart.yml
+++ b/docker/quickstart/docker-compose.quickstart.yml
@@ -151,6 +151,7 @@ services:
- DATAHUB_GMS_PORT=8080
- ENTITY_REGISTRY_CONFIG_PATH=/datahub/datahub-gms/resources/entity-registry.yml
- BACKFILL_BROWSE_PATHS_V2=true
+ - REPROCESS_DEFAULT_BROWSE_PATHS_V2=false
hostname: datahub-upgrade
image: ${DATAHUB_UPGRADE_IMAGE:-acryldata/datahub-upgrade}:${DATAHUB_VERSION:-head}
labels:
diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/client/CachingEntitySearchService.java b/metadata-io/src/main/java/com/linkedin/metadata/search/client/CachingEntitySearchService.java
index ceaf37a1289d9..db414d70603dc 100644
--- a/metadata-io/src/main/java/com/linkedin/metadata/search/client/CachingEntitySearchService.java
+++ b/metadata-io/src/main/java/com/linkedin/metadata/search/client/CachingEntitySearchService.java
@@ -16,7 +16,7 @@
import javax.annotation.Nonnull;
import javax.annotation.Nullable;
import lombok.RequiredArgsConstructor;
-import org.javatuples.Quintet;
+import org.javatuples.Septet;
import org.javatuples.Sextet;
import org.springframework.cache.Cache;
import org.springframework.cache.CacheManager;
@@ -154,8 +154,9 @@ public SearchResult getCachedSearchResults(
batchSize,
querySize -> getRawSearchResults(entityNames, query, filters, sortCriterion, querySize.getFrom(),
querySize.getSize(), flags, facets),
- querySize -> Sextet.with(entityNames, query, filters != null ? toJsonString(filters) : null,
- sortCriterion != null ? toJsonString(sortCriterion) : null, facets, querySize), flags, enableCache).getSearchResults(from, size);
+ querySize -> Septet.with(entityNames, query, filters != null ? toJsonString(filters) : null,
+ sortCriterion != null ? toJsonString(sortCriterion) : null, flags != null ? toJsonString(flags) : null,
+ facets, querySize), flags, enableCache).getSearchResults(from, size);
}
@@ -175,7 +176,8 @@ public AutoCompleteResult getCachedAutoCompleteResults(
if (enableCache(flags)) {
try (Timer.Context ignored2 = MetricUtils.timer(this.getClass(), "getCachedAutoCompleteResults_cache").time()) {
Timer.Context cacheAccess = MetricUtils.timer(this.getClass(), "autocomplete_cache_access").time();
- Object cacheKey = Quintet.with(entityName, input, field, filters != null ? toJsonString(filters) : null, limit);
+ Object cacheKey = Sextet.with(entityName, input, field, filters != null ? toJsonString(filters) : null,
+ flags != null ? toJsonString(flags) : null, limit);
String json = cache.get(cacheKey, String.class);
result = json != null ? toRecordTemplate(AutoCompleteResult.class, json) : null;
cacheAccess.stop();
@@ -210,7 +212,8 @@ public BrowseResult getCachedBrowseResults(
if (enableCache(flags)) {
try (Timer.Context ignored2 = MetricUtils.timer(this.getClass(), "getCachedBrowseResults_cache").time()) {
Timer.Context cacheAccess = MetricUtils.timer(this.getClass(), "browse_cache_access").time();
- Object cacheKey = Quintet.with(entityName, path, filters != null ? toJsonString(filters) : null, from, size);
+ Object cacheKey = Sextet.with(entityName, path, filters != null ? toJsonString(filters) : null,
+ flags != null ? toJsonString(flags) : null, from, size);
String json = cache.get(cacheKey, String.class);
result = json != null ? toRecordTemplate(BrowseResult.class, json) : null;
cacheAccess.stop();
@@ -247,9 +250,10 @@ public ScrollResult getCachedScrollResults(
ScrollResult result;
if (enableCache(flags)) {
Timer.Context cacheAccess = MetricUtils.timer(this.getClass(), "scroll_cache_access").time();
- Object cacheKey = Sextet.with(entities, query,
+ Object cacheKey = Septet.with(entities, query,
filters != null ? toJsonString(filters) : null,
sortCriterion != null ? toJsonString(sortCriterion) : null,
+ flags != null ? toJsonString(flags) : null,
scrollId, size);
String json = cache.get(cacheKey, String.class);
result = json != null ? toRecordTemplate(ScrollResult.class, json) : null;
diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/ESSearchDAO.java b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/ESSearchDAO.java
index cbaf70ca22617..290e8c60deb00 100644
--- a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/ESSearchDAO.java
+++ b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/ESSearchDAO.java
@@ -157,7 +157,7 @@ private AggregationMetadataArray transformIndexIntoEntityName(AggregationMetadat
@Nonnull
@WithSpan
private ScrollResult executeAndExtract(@Nonnull List entitySpecs, @Nonnull SearchRequest searchRequest, @Nullable Filter filter,
- @Nullable String scrollId, @Nonnull String keepAlive, int size) {
+ @Nullable String scrollId, @Nullable String keepAlive, int size) {
try (Timer.Context ignored = MetricUtils.timer(this.getClass(), "executeAndExtract_scroll").time()) {
final SearchResponse searchResponse = client.search(searchRequest, RequestOptions.DEFAULT);
// extract results, validated against document model as well
@@ -166,7 +166,7 @@ private ScrollResult executeAndExtract(@Nonnull List entitySpecs, @N
.extractScrollResult(searchResponse,
filter, scrollId, keepAlive, size, supportsPointInTime()));
} catch (Exception e) {
- log.error("Search query failed", e);
+ log.error("Search query failed: {}", searchRequest, e);
throw new ESQueryException("Search query failed:", e);
}
}
diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/SearchRequestHandler.java b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/SearchRequestHandler.java
index 49571a60d5f21..0df6afd49c373 100644
--- a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/SearchRequestHandler.java
+++ b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/SearchRequestHandler.java
@@ -241,7 +241,9 @@ public SearchRequest getSearchRequest(@Nonnull String input, @Nullable Filter fi
BoolQueryBuilder filterQuery = getFilterQuery(filter);
searchSourceBuilder.query(QueryBuilders.boolQuery().must(getQuery(input, finalSearchFlags.isFulltext())).filter(filterQuery));
- _aggregationQueryBuilder.getAggregations().forEach(searchSourceBuilder::aggregation);
+ if (!finalSearchFlags.isSkipAggregates()) {
+ _aggregationQueryBuilder.getAggregations().forEach(searchSourceBuilder::aggregation);
+ }
if (!finalSearchFlags.isSkipHighlighting()) {
searchSourceBuilder.highlighter(_highlights);
}
@@ -366,7 +368,7 @@ public SearchResult extractResult(@Nonnull SearchResponse searchResponse, Filter
@WithSpan
public ScrollResult extractScrollResult(@Nonnull SearchResponse searchResponse, Filter filter, @Nullable String scrollId,
- @Nonnull String keepAlive, int size, boolean supportsPointInTime) {
+ @Nullable String keepAlive, int size, boolean supportsPointInTime) {
int totalCount = (int) searchResponse.getHits().getTotalHits().value;
List resultList = getResults(searchResponse);
SearchResultMetadata searchResultMetadata = extractSearchResultMetadata(searchResponse, filter);
@@ -376,7 +378,7 @@ public ScrollResult extractScrollResult(@Nonnull SearchResponse searchResponse,
if (searchHits.length == size) {
Object[] sort = searchHits[searchHits.length - 1].getSortValues();
long expirationTimeMs = 0L;
- if (supportsPointInTime) {
+ if (keepAlive != null && supportsPointInTime) {
expirationTimeMs = TimeValue.parseTimeValue(keepAlive, "expirationTime").getMillis() + System.currentTimeMillis();
}
nextScrollId = new SearchAfterWrapper(sort, searchResponse.pointInTimeId(), expirationTimeMs).toScrollId();
diff --git a/metadata-service/configuration/src/main/resources/application.yml b/metadata-service/configuration/src/main/resources/application.yml
index e9113d339e81d..a06891699607b 100644
--- a/metadata-service/configuration/src/main/resources/application.yml
+++ b/metadata-service/configuration/src/main/resources/application.yml
@@ -285,6 +285,8 @@ bootstrap:
enabled: ${UPGRADE_DEFAULT_BROWSE_PATHS_ENABLED:false} # enable to run the upgrade to migrate legacy default browse paths to new ones
backfillBrowsePathsV2:
enabled: ${BACKFILL_BROWSE_PATHS_V2:false} # Enables running the backfill of browsePathsV2 upgrade step. There are concerns about the load of this step so hiding it behind a flag. Deprecating in favor of running through SystemUpdate
+ reprocessDefaultBrowsePathsV2:
+ enabled: ${REPROCESS_DEFAULT_BROWSE_PATHS_V2:false} # reprocess V2 browse paths which were set to the default: {"path":[{"id":"Default"}]}
policies:
file: ${BOOTSTRAP_POLICIES_FILE:classpath:boot/policies.json}
# eg for local file
diff --git a/metadata-service/services/src/main/java/com/linkedin/metadata/search/EntitySearchService.java b/metadata-service/services/src/main/java/com/linkedin/metadata/search/EntitySearchService.java
index 64f59780b887f..cbfeeaef860d3 100644
--- a/metadata-service/services/src/main/java/com/linkedin/metadata/search/EntitySearchService.java
+++ b/metadata-service/services/src/main/java/com/linkedin/metadata/search/EntitySearchService.java
@@ -193,7 +193,7 @@ BrowseResult browse(@Nonnull String entityName, @Nonnull String path, @Nullable
*/
@Nonnull
ScrollResult fullTextScroll(@Nonnull List entities, @Nonnull String input, @Nullable Filter postFilters,
- @Nullable SortCriterion sortCriterion, @Nullable String scrollId, @Nonnull String keepAlive, int size, @Nullable SearchFlags searchFlags);
+ @Nullable SortCriterion sortCriterion, @Nullable String scrollId, @Nullable String keepAlive, int size, @Nullable SearchFlags searchFlags);
/**
* Gets a list of documents that match given search request. The results are aggregated and filters are applied to the
@@ -210,7 +210,7 @@ ScrollResult fullTextScroll(@Nonnull List entities, @Nonnull String inpu
*/
@Nonnull
ScrollResult structuredScroll(@Nonnull List entities, @Nonnull String input, @Nullable Filter postFilters,
- @Nullable SortCriterion sortCriterion, @Nullable String scrollId, @Nonnull String keepAlive, int size, @Nullable SearchFlags searchFlags);
+ @Nullable SortCriterion sortCriterion, @Nullable String scrollId, @Nullable String keepAlive, int size, @Nullable SearchFlags searchFlags);
/**
* Max result size returned by the underlying search backend
From e73e92699947084b5ecb1f5d3e0c5762dc446bbf Mon Sep 17 00:00:00 2001
From: Shubham Jagtap <132359390+shubhamjagtap639@users.noreply.github.com>
Date: Wed, 8 Nov 2023 12:32:41 +0530
Subject: [PATCH 071/792] feat(integration/fivetran): Fivetran connector
integration (#9018)
Co-authored-by: Harshal Sheth
---
.../app/ingest/source/builder/constants.ts | 4 +
.../app/ingest/source/builder/sources.json | 7 +
datahub-web-react/src/images/fivetranlogo.png | Bin 0 -> 10230 bytes
.../docs/sources/fivetran/fivetran_pre.md | 86 +++
.../docs/sources/fivetran/fivetran_recipe.yml | 43 ++
metadata-ingestion/setup.py | 3 +
.../datahub/api/entities/datajob/datajob.py | 25 +-
.../dataprocess/dataprocess_instance.py | 27 +-
metadata-ingestion/src/datahub/emitter/mcp.py | 4 +-
.../datahub/ingestion/api/source_helpers.py | 13 +-
.../ingestion/source/fivetran/__init__.py | 0
.../ingestion/source/fivetran/config.py | 145 ++++
.../ingestion/source/fivetran/data_classes.py | 36 +
.../ingestion/source/fivetran/fivetran.py | 289 ++++++++
.../source/fivetran/fivetran_log_api.py | 147 ++++
.../source/fivetran/fivetran_query.py | 76 ++
.../ingestion/source_config/sql/snowflake.py | 82 ++-
.../integration/fivetran/fivetran_golden.json | 658 ++++++++++++++++++
.../integration/fivetran/test_fivetran.py | 192 +++++
.../main/resources/boot/data_platforms.json | 10 +
20 files changed, 1777 insertions(+), 70 deletions(-)
create mode 100644 datahub-web-react/src/images/fivetranlogo.png
create mode 100644 metadata-ingestion/docs/sources/fivetran/fivetran_pre.md
create mode 100644 metadata-ingestion/docs/sources/fivetran/fivetran_recipe.yml
create mode 100644 metadata-ingestion/src/datahub/ingestion/source/fivetran/__init__.py
create mode 100644 metadata-ingestion/src/datahub/ingestion/source/fivetran/config.py
create mode 100644 metadata-ingestion/src/datahub/ingestion/source/fivetran/data_classes.py
create mode 100644 metadata-ingestion/src/datahub/ingestion/source/fivetran/fivetran.py
create mode 100644 metadata-ingestion/src/datahub/ingestion/source/fivetran/fivetran_log_api.py
create mode 100644 metadata-ingestion/src/datahub/ingestion/source/fivetran/fivetran_query.py
create mode 100644 metadata-ingestion/tests/integration/fivetran/fivetran_golden.json
create mode 100644 metadata-ingestion/tests/integration/fivetran/test_fivetran.py
diff --git a/datahub-web-react/src/app/ingest/source/builder/constants.ts b/datahub-web-react/src/app/ingest/source/builder/constants.ts
index dba8e8bb1dce6..fdb094d721304 100644
--- a/datahub-web-react/src/app/ingest/source/builder/constants.ts
+++ b/datahub-web-react/src/app/ingest/source/builder/constants.ts
@@ -29,6 +29,7 @@ import databricksLogo from '../../../../images/databrickslogo.png';
import verticaLogo from '../../../../images/verticalogo.png';
import mlflowLogo from '../../../../images/mlflowlogo.png';
import dynamodbLogo from '../../../../images/dynamodblogo.png';
+import fivetranLogo from '../../../../images/fivetranlogo.png';
export const ATHENA = 'athena';
export const ATHENA_URN = `urn:li:dataPlatform:${ATHENA}`;
@@ -105,6 +106,8 @@ export const DBT_CLOUD = 'dbt-cloud';
export const DBT_CLOUD_URN = `urn:li:dataPlatform:dbt`;
export const VERTICA = 'vertica';
export const VERTICA_URN = `urn:li:dataPlatform:${VERTICA}`;
+export const FIVETRAN = 'fivetran';
+export const FIVETRAN_URN = `urn:li:dataPlatform:${FIVETRAN}`;
export const PLATFORM_URN_TO_LOGO = {
[ATHENA_URN]: athenaLogo,
@@ -138,6 +141,7 @@ export const PLATFORM_URN_TO_LOGO = {
[SUPERSET_URN]: supersetLogo,
[UNITY_CATALOG_URN]: databricksLogo,
[VERTICA_URN]: verticaLogo,
+ [FIVETRAN_URN]: fivetranLogo,
};
export const SOURCE_TO_PLATFORM_URN = {
diff --git a/datahub-web-react/src/app/ingest/source/builder/sources.json b/datahub-web-react/src/app/ingest/source/builder/sources.json
index b18384909c33f..9619abebbd54e 100644
--- a/datahub-web-react/src/app/ingest/source/builder/sources.json
+++ b/datahub-web-react/src/app/ingest/source/builder/sources.json
@@ -216,6 +216,13 @@
"docsUrl": "https://datahubproject.io/docs/generated/ingestion/sources/vertica/",
"recipe": "source:\n type: vertica\n config:\n # Coordinates\n host_port: localhost:5433\n # The name of the vertica database\n database: Database_Name\n # Credentials\n username: Vertica_User\n password: Vertica_Password\n\n include_tables: true\n include_views: true\n include_projections: true\n include_models: true\n include_view_lineage: true\n include_projection_lineage: true\n profiling:\n enabled: false\n stateful_ingestion:\n enabled: true "
},
+ {
+ "urn": "urn:li:dataPlatform:fivetran",
+ "name": "fivetran",
+ "displayName": "Fivetran",
+ "docsUrl": "https://datahubproject.io/docs/generated/ingestion/sources/fivetran/",
+ "recipe": "source:\n type: fivetran\n config:\n # Fivetran log connector destination server configurations\n fivetran_log_config:\n destination_platform: snowflake\n destination_config:\n # Coordinates\n account_id: snowflake_account_id\n warehouse: warehouse_name\n database: snowflake_db\n log_schema: fivetran_log_schema\n\n # Credentials\n username: ${SNOWFLAKE_USER}\n password: ${SNOWFLAKE_PASS}\n role: snowflake_role\n\n # Optional - filter for certain connector names instead of ingesting everything.\n # connector_patterns:\n # allow:\n # - connector_name\n\n # Optional -- This mapping is optional and only required to configure platform-instance for source\n # A mapping of Fivetran connector id to data platform instance\n # sources_to_platform_instance:\n # calendar_elected:\n # platform_instance: cloud_postgres_instance\n # env: DEV\n\n # Optional -- This mapping is optional and only required to configure platform-instance for destination.\n # A mapping of Fivetran destination id to data platform instance\n # destination_to_platform_instance:\n # calendar_elected:\n # platform_instance: cloud_postgres_instance\n # env: DEV"
+ },
{
"urn": "urn:li:dataPlatform:custom",
"name": "custom",
diff --git a/datahub-web-react/src/images/fivetranlogo.png b/datahub-web-react/src/images/fivetranlogo.png
new file mode 100644
index 0000000000000000000000000000000000000000..d5c999ad2d86e99273971dd0d31a18fd5e94733b
GIT binary patch
literal 10230
zcmZX42{@G9`@c1NmIh-dEmUN(?;)BZ%h*YT?6QWj#Sp!d7?E8eOZG^zXW!RES(+?a
zLUvgye&?BaU)T44{jY1T>ABCj&;7a2=U&cvO#kR=UuU4@q$MFCVYq?3h9V&$1CL}R
z)a2lwmfSO)BqZb{x>^S6KqT=#BJukFJ`avaJdQ{_|7ZO8`S{^@mCw8c~zvKOiu5ycXyamw<0D2ylt||NBkECHe!l
z29^Ugq7fhxM|}T7Bw9O0NB^h!zlgx#v4=!`LxLKSieqSC1kiYF3TP6sjvVPRQPIQ2%|G!g2CyAQJ)&M8RYl+K=
z;_)DO08{{dzyCDB2od_&2+=a}`f6X30106TQ6wxdgKb`_cz=tUQ
z?;!E{cosYmfsP-5DFV9H!xSk{0!Q8`ZG959;I7r*v$+~H&UK52B{zg(K5g%P#&WvU
z^IV^QbMi#&pLJcFu;t$#*iG9>dZ~lXgaz7VpZ3e16Qq}(Wy!gXG(O!-YNp!JzZ21F
z$W%r)edR5yoeEsumGRdL_p$E$tyBFEwaiOJ`6;SUPrj2!bKg@#c*i3%hF(7q+tsx2
zx2gHT)g;t5$GM7qCs})z;^FX+sBdwKk|`T#y+?j6wbC8|
z>Mwa!QybVdm#r{r0
z>HRg+$h6x!3ZwrF^cg+3%6Vn|AyB5gSA^lKKD9pyiD2%HYbpkQ<3H2)KP1{5cEos|
zk-NYks9EbYh++21dZs1B>0EhcOf}^a^N_IfYdcj`RQye3Wvqz#m{X_i%C$#DEnlqR
zyPtc*))=?^CKq;;UVdJ#`LkI%NxD?iEKlKUjK^2st5CR$wNw)RURTd#aJDqs1%yZ5v!Sl_YHI|q?Y
ze?+rl)_G^<@WIl
z9_83;Q?7ygm}*y1u;z*!auRU2yBsxU9x_ISI#|aw;3HtgL7KAUe=h%wMEqUaO!~H?
z7XKIBNeU)P?gS3X@jOOQZ6vK&AsuzZyBRm6vK@BVWxRyixqW0}WXVt`=LD|%bD^qE
ztgFa(hj4V-L`OtMhXt#sMQ{9i<<2ml-c@7WTSnIFf%8;glx>>8Wj(N_F|Ifg`4f6a
z%9e`PizPd)S4|t(b*G6Yi+F)WwD@V&eiKA`9XJU(i?xCUl4-u@FRO
z)NX26Bk?Ax1Je`zV+wXQ)IU1ame9ZljZGD2>rPsk$PU{D)$Vf=HlGOF+oI@Ci$Q$2
zq-ef;G;#{1v4y(;EF{d32StT?vBYrDeHav-;(JS1&mere#m|16qg*X!7QvxC@#-Vl
z(JSneItF1gEaUbQL~ig!UN4$#YEfRQ002k9x2Q;;BbvpmVAMeH_U4{Zyo0h`G{fM&JPo_7~i5am&`DlUKGZrU?)iM>~dapgy1K0zo<;zq=&sumUdr?$?WN`zs3ObQpn^E
zwFp351%>QRuAqU^z4KhObdM-w#(sj0imGVW&rv^j)3g4njn(K5!(ep_>^UNtgk8y
zVaOrn7PSZ{iKC~(DqG#dYvMbm=za?@ICPq*HJRV8iSs>IOKwv`BW8#@z4$1&=)Hit
zbGmlmV84X>VKW4NPr)|hy}+YoGbM)sP7UOzX+)K7)ru_V6{lFByX=HdUNMR?Jy+$|
z{HR%eg9$R~e^?qgD0Ri1Vi*genz7a9>83{exvx%KpFtd700nW6l6O-Y6w9bfs5qCE
zmUwV$T&Awc;Qov{RD*Mxpg(ZQ?D#?4c{liRz(VMT
zDw*p~;a(RII*0bts?(qSI+vmTG8Z_w9`(MwJed%YLp3PT+}8WyH>D{~<%m3LcH6|U
z0=;5uySnGk>xtQ@ERlxcPQ}lsHvyD;7tvAE&Tr}Z8ihN{Zn($Z7y2q>ZrD#qTjdHb
zC_c!S#t)1`ek6j}#q
z6|d09?VDV2Vqb^9
z3*Hf>eT?YG0xbfft|IQaC)4-aUr1Bv&%ayp?VjvP*=V
z5jh=k?Oc=Pz*r#pf*(}Wl*xA!bjGXB>@AOZKFv+Ch>okIxFxo>0}_em(7%8FZj`l;
ztoHapcx2if@NH(fp!@@u7-nov#<9>-p3{0i`>Ce=dcR4@5Rp21Ue5+1EqRYZ1%W-f
zM>^jJDn(vac-+u6zE`brs#g>cu-2vKWMTJ(7L+nalwPlB2iVoypXP)tkDkaqK?e}3
zbWPkV6&t}Bvm>__mD7X)o`C91a~*(=ZW_7!=A4%Z+nSD0uuTA=d>$9z3x4m!hP~R}E6Q7H;>RnK1
z*A)t+>`Pkyd3}0G$lZ=TN-ql62-@?A7o(dpHj&?QR-8QI$b*yi(SE@~JKhIYI#ee<
zMgt$Vgi^v~#7nb9+d9rVyjA!_KM53B>GN;N8rFSU)NY~eIpG>e`gufz>hy&r9~-S=
zXV`{YP?vn%(?+<;ci>4~ql-OlX(Q4uaViS;`hjHKBV=sp#?w&oXB|_j)7dQB
z#DjuaV!JSCn1>eV&YDN*Rh)TEkoYZa{t*y&fydNT-!5dv0xeC^j4iz@tIk?+
z22gP0ED~3GH@|?X;*1!K_Wa0gY#>9R@p;D^A#{)8oJ7?dPBaw8{f}nVf0ATkMqk2Q
ziT2Pl5_V`EM#?<5R!&8e?6y#v0fxf0@{;cZYyw4bo<1bqOXyW1X#Wy(j*31q*2D6~Hd#2MJsH_D5T>RQRhbeKG2
zeCAXX<7Z!PgU}dymNELy`_i?<)h2oI!N%kPn@fb%Akh+PYwFQj7hor|hAH~F!x3Qb
z`MNUU2Y+M1Ze6xKdE;mwg*ouyld(_4>=M-4v~+{+;z%NU#2;D|E?kI9qj^zX0g2w!_;}bGzk@^a?m+(F@du{GL{=jz}0KwX!~*3XoR32fwU4b`}<8v!D>Vb
z-}~90fQ#j=6BI9k?ko%6iYEaKsv(EI1R0Yrh!a_2#wX`7#QFJ76VUJr>0L_gNe?K9
zTws}5bU0-Si53~r_PHmUlm3;ADf$)uqaw3A(a*jHE+f4~Kyw2wKWs@FBZJ%$P`BBY
zMra_4&au)`#b%WK?ukfzYh^-UP{n33hl?s71E%KHgQ!#>vdgl1#BG4EkMhws$7j6o3)_xXur)Y74
zeEQigZ-bVlix`1oSd;48zUhhABP@%=WtG*=%>_~rl5(45EeZ%?L;=^x(b7}x$xW0L
z2~&au;S;FhP0t1tjw1ntWwB8klt7}FJuxowplSD@iK+bhGN?56_CGgpVJVR2{sXjq
zMZ-B}EhBl6i_U
zQsOsUCY6{W;1!l$G71OpBe2f^(m=Zfu71)9L@?71vswR^J}p-P3gHkEpL{MNaVwE1
zp+S@1hrjAnxdrhm^o`4AD)3($v*~3~IFpwIXgQ0i-nM_$RJ^6uv>3@X2%8C8F3>+o?Tu^+BA3yLU@ElIC>?>r}Cu^-3qA`Kcrc`4@OO6@$QXe}XC<%y69O
zwKoUSeN+(!G4$y@v)sEtMgS4&56JnyWtGqIw6Bi|sIg+a;bK
zF#)rMAiY&j^H7H4vI5u4$QM|6hnwY%JeIWZ$(w3oc3iw(Sl`z(pLhUO>2SeLm7l9T
z1<12P-{jjve%#Qn{1t7GePFasD?@7IW5>}`PtIDH@NSkb=3v>dJU$d_^lhE7
z@h>)_CQ4P~11QR^dP7x@BMFwaW^~gUrL+hF;9@g{+9KU2Q93P}vN_V{zr#i;?Q1diGjtx3Y!Z-a&Ltt%b17KrOJ`2SN
z+lameVK4)yIVj?&L2t+|0)bz0xz9YT7A-s6U4D5^?ke8LRo6a=4+s7@L>mp(TkVT
zOKBniP!@Dy&tpb51|$2@)Oby|EkBZC^b&yzMnGVG^mH*I-i^j=m;rPnWI^@n_u?)#
z)$U5Nn3AUPVvvXyx8=GY;#d6
z_iypf(UYEK2w=#%NI6O$9&d;tk7R>#Le{wcR|HP)MZ7&B
z3DC7c{5ioFUu;(TzCJx-gyCEG#;@)^CH4RZ^?^GWsws7(9=A;0G)S$3LfoQDK##lFf(hOcy)u1|X|L^Rk&)?s_Y_-SRn^UBRa7e$o>~
zgQDMSO1_I#VJ(DEDaeD%{|ek=V?=o8X@NkKD!cOMyb8h8>Ysa_K{JbXZlCQHYs$b5
zKoO#FTX12?j$+eYpPb2N$@Y`~z#t6n34ZrIP{J&A=?uF&x6s&3`Fj8)in)1r$C6eE
znxi>*YZax-j@0u$_q$u(Aa;ohq~5pB5Bti!8Q|U&)94)Rnj?9!x#C6uvI!TBh##PgkNa2hzlsI0
z8xX7wfbHR*v*Z2v!x+f=3v|38Y`81tSH~caZ+e(U%2ylQ@6ccJhg!jWy*-~AHVal9
z3)P1;{&yU%+(Z}allre{@F(<~kh-SendU`(e2NQaw$RO?Rf7MwM;}DM)P=_tAy7<@
zTnp8>E$=f8WO(dM`Kkso`Ao(XIKf%6tBTASgEJT$8};2avDv5!Gw`a$=iuu1IP
zt~z*s85;SF<2cu;C`Nzr<^`2gXlHuy23R$fpNSrbqW2CDi7>dX>uW&Ue8BccX5nhL
zse&!|lP}LOseRTnZB}
zOxA4$m1i4V0OYcs!oAzjjXwtz$qAh2>yVG%pa7>fa;DXkCYg@)5JM)kOe>6u=`-4a
zf^-6eA&6gnqipOmF}~CQQc3E=7J#g=Sf9O3EzT}&4#xvC4T89Os
z$nig^+E1&
zQzGe>7MXjh(zX$b&1&wEGbU#pYD_mGCr1Kkc}W6|w;Dn)^04a#e<_<%V`~qpF(Vrm
zPqQvEtm<+Xj&C*93Lmu|x-^{%S#zSWaHM~`EciUDW+rZOWPrKoP;xRX>tf$5Uc=83
zml3}^KRLo$9O&Otn*z>k+7}jlh0zOP($XJlW+;PqRy3<3r72+wwMwY;OVs$0>cWR-
zO#UiWMH*AyqjvM>n$DU3Fdz0_&Uc50^8Bp`k8c)46aC-Aj1R#pE5=9(493t2mHGkQ
zXv|b`uZ6`>H&x8+?&hSre-~%Gho3*QsggC?r5Pu)BYoIQaW}NeS|cn85~}(mgKv(0Jt8#oE+c
zT^2#1Ry#V=yyLV&4E0afpJlDBKB+GJaM6Uv9{eD3ZyC)som&&Qey3lt%=nCpt$Cy3
zO1qlV8Z}KDF4EV__@$7^--}g|wUqG}+yv{g;hVUJ>A7%0gIkmzbx?ubtE;ZzsZujx
z#(fXQCI2jqnN?ANn~kLU$=Jeojah$|D;z=iWAr`Nyq~{kzLUb=Ngr?-kaQ##!MCuh
zFem-aC%pBNlHZ7yE&F#~uHdw!<*;JFM7so#B4QcD^Q+)*?OVrf!X0T
zA9efPBCg36Uj+&sd|Y%hn@Lfty?+T5LtZw3A^o>Ru^;3)JLVCN9J?8TeVF#Z;H>X?%owb0Cw%k?Wv4
z0U=QaJ@jGvv}Opw&~Sc=en}!6dtv3bJ~%mPtEkk&6(Q@?mQOY=n%S?iB+=UDPba
z6l(*2m=*p;(7p&MoaUuSW_ZO05-?|$f^9BlikbePC|7!C0A(w68>*?L_cLLM%9!hj
z#Z&eXhWJ+f1Ab(M9EK79$#Slw1QZY5WQ!|VHw+Y1DGf$i#CJ!WqEII?KzF+?nN&FT
z=Fm`%*XmLX&0)8g#P+G}y6XHk^=R&NMA4PV-8g(p~>YADlW<)d-N5mK-*B
z!4*P~RLKKth+$S(p0gXhpzT=hI>k%DLjyF6{MD?6>Ir*#8XlnQeP(YGA``W@CGO0c
z{*Q-Uk~Jr1iF%YF26p(T*Q^VkIqlA9xz3~}Vx5i3USF^u4dyrb>r)l^kXERQ4S^EA
z;=Hg>((ta4vqd)Dij#8{#aGC=)l_)yt(6o3(=i#p?0A{_GI*Xi74D4ks&<51iT1F#3OS$=_f$=
z%_&ExXtmos+8IFebmZr&J6C-@ADgYHMww^jjh1|LKUQJ^-JEKR|GQoiAl=J6ElZ>bBMh_4JlKWCrjg>`T|1(o=*8Z$M=mviT=iQ)$+
zPt#9ab!7b!T+jV)~QRcA-sgsjIq>5mpq;0@hK=1^1qQJ|iUg
zkaMk!%(>vlt-o_(<2T9Jm;JbZP(KnQAv?SHr2q9%-*NUB-s1;-X!M=`DbRMxrLMDsZICgSOCC^&OR
zr|^eZQ^_hxiz1e^&vWc!7m5(uzg`ZgxjWaGyekHc{qzI_W#I>g-8b(;Fy_PR1#i?V
zT`5Pkc*1|V=UW-|odx{=U;)unucUP@m!9GkT@Zk*ISIX4Y?>V5V=KZ_uHJnbf{9Gf
zDA0fZJ#6>Z>XJEiQIoJdc0SzC!Lf*?7oBgC_P)wCib;EnS>k-rUKrFgVPnCmwKW+L
z6S(qM4%#LiP}9S@Y>BtPVvK#nCe;FXH&e~zRcEwA)j!Xb941)pfJVy8s#^nfnFn$k
zP39@JHK#Y$7f(7chUG>~s;#g{{4(EtQ`dIZCY(1s;LGLjyw=8+q3ZjIK9~gWv9+nA
zfdI;{YTr?|5pzcz^{Q64DrLvjw(`_g1$-=;sMZ=Lp_MhwikBFwBfpG11w~Ov()MPd
z#bd_ji(;#2!?Uedf7%%5i_4+{7;7TG7)Cz*x@WilWcO9xjmp&7`p7TuWK&t+RMw27
zDqb(S?=;8i!&p{`?O~u0e!$MHR;-k}f7Hlp{h4+~r%93bdg15u^EWExhsV1_R={Y3
zuN04YH}?yB`G?MqZ$Ei7J<~f|q~z3vQqZb" to role fivetran_datahub;
+
+// Grant access to view database and schema in which your log and metadata tables exist
+grant usage on DATABASE "" to role fivetran_datahub;
+grant usage on SCHEMA ""."" to role fivetran_datahub;
+
+// Grant access to execute select query on schema in which your log and metadata tables exist
+grant select on all tables in SCHEMA ""."" to role fivetran_datahub;
+
+// Grant the fivetran_datahub to the snowflake user.
+grant role fivetran_datahub to user snowflake_user;
+```
+
+## Advanced Configurations
+
+### Working with Platform Instances
+If you've multiple instances of source/destination systems that are referred in your `fivetran` setup, you'd need to configure platform instance for these systems in `fivetran` recipe to generate correct lineage edges. Refer the document [Working with Platform Instances](https://datahubproject.io/docs/platform-instances) to understand more about this.
+
+While configuration of platform instance for source system you need to provide connector id as key and for destination system provide destination id as key.
+
+#### Example - Multiple Postgres Source Connectors each reading from different postgres instance
+```yml
+ # Map of connector source to platform instance
+ sources_to_platform_instance:
+ postgres_connector_id1:
+ platform_instance: cloud_postgres_instance
+ env: PROD
+
+ postgres_connector_id2:
+ platform_instance: local_postgres_instance
+ env: DEV
+```
+
+#### Example - Multiple Snowflake Destinations each writing to different snowflake instance
+```yml
+ # Map of destination to platform instance
+ destination_to_platform_instance:
+ snowflake_destination_id1:
+ platform_instance: prod_snowflake_instance
+ env: PROD
+
+ snowflake_destination_id2:
+ platform_instance: dev_snowflake_instance
+ env: PROD
+```
+
+
+
diff --git a/metadata-ingestion/docs/sources/fivetran/fivetran_recipe.yml b/metadata-ingestion/docs/sources/fivetran/fivetran_recipe.yml
new file mode 100644
index 0000000000000..7c654df59723c
--- /dev/null
+++ b/metadata-ingestion/docs/sources/fivetran/fivetran_recipe.yml
@@ -0,0 +1,43 @@
+source:
+ type: fivetran
+ config:
+ # Fivetran log connector destination server configurations
+ fivetran_log_config:
+ destination_platform: snowflake
+ destination_config:
+ # Coordinates
+ account_id: "abc48144"
+ warehouse: "COMPUTE_WH"
+ database: "MY_SNOWFLAKE_DB"
+ log_schema: "FIVETRAN_LOG"
+
+ # Credentials
+ username: "${SNOWFLAKE_USER}"
+ password: "${SNOWFLAKE_PASS}"
+ role: "snowflake_role"
+
+ # Optional - filter for certain connector names instead of ingesting everything.
+ # connector_patterns:
+ # allow:
+ # - connector_name
+
+ # Optional -- A mapping of the connector's all sources to its database.
+ # sources_to_database:
+ # connector_id: source_db
+
+ # Optional -- This mapping is optional and only required to configure platform-instance for source
+ # A mapping of Fivetran connector id to data platform instance
+ # sources_to_platform_instance:
+ # connector_id:
+ # platform_instance: cloud_instance
+ # env: DEV
+
+ # Optional -- This mapping is optional and only required to configure platform-instance for destination.
+ # A mapping of Fivetran destination id to data platform instance
+ # destination_to_platform_instance:
+ # destination_id:
+ # platform_instance: cloud_instance
+ # env: DEV
+
+sink:
+ # sink configs
diff --git a/metadata-ingestion/setup.py b/metadata-ingestion/setup.py
index afce8dcee840b..2392fce058061 100644
--- a/metadata-ingestion/setup.py
+++ b/metadata-ingestion/setup.py
@@ -395,6 +395,7 @@
"powerbi-report-server": powerbi_report_server,
"vertica": sql_common | {"vertica-sqlalchemy-dialect[vertica-python]==0.0.8.1"},
"unity-catalog": databricks | sqllineage_lib,
+ "fivetran": snowflake_common,
}
# This is mainly used to exclude plugins from the Docker image.
@@ -525,6 +526,7 @@
"nifi",
"vertica",
"mode",
+ "fivetran",
"kafka-connect",
]
if plugin
@@ -629,6 +631,7 @@
"unity-catalog = datahub.ingestion.source.unity.source:UnityCatalogSource",
"gcs = datahub.ingestion.source.gcs.gcs_source:GCSSource",
"sql-queries = datahub.ingestion.source.sql_queries:SqlQueriesSource",
+ "fivetran = datahub.ingestion.source.fivetran.fivetran:FivetranSource",
],
"datahub.ingestion.transformer.plugins": [
"simple_remove_dataset_ownership = datahub.ingestion.transformer.remove_dataset_ownership:SimpleRemoveDatasetOwnership",
diff --git a/metadata-ingestion/src/datahub/api/entities/datajob/datajob.py b/metadata-ingestion/src/datahub/api/entities/datajob/datajob.py
index 0face6415bacc..6c42e830e223b 100644
--- a/metadata-ingestion/src/datahub/api/entities/datajob/datajob.py
+++ b/metadata-ingestion/src/datahub/api/entities/datajob/datajob.py
@@ -100,7 +100,9 @@ def generate_tags_aspect(self) -> Iterable[GlobalTagsClass]:
)
return [tags]
- def generate_mcp(self) -> Iterable[MetadataChangeProposalWrapper]:
+ def generate_mcp(
+ self, materialize_iolets: bool = True
+ ) -> Iterable[MetadataChangeProposalWrapper]:
mcp = MetadataChangeProposalWrapper(
entityUrn=str(self.urn),
aspect=DataJobInfoClass(
@@ -113,7 +115,9 @@ def generate_mcp(self) -> Iterable[MetadataChangeProposalWrapper]:
)
yield mcp
- yield from self.generate_data_input_output_mcp()
+ yield from self.generate_data_input_output_mcp(
+ materialize_iolets=materialize_iolets
+ )
for owner in self.generate_ownership_aspect():
mcp = MetadataChangeProposalWrapper(
@@ -144,7 +148,9 @@ def emit(
for mcp in self.generate_mcp():
emitter.emit(mcp, callback)
- def generate_data_input_output_mcp(self) -> Iterable[MetadataChangeProposalWrapper]:
+ def generate_data_input_output_mcp(
+ self, materialize_iolets: bool
+ ) -> Iterable[MetadataChangeProposalWrapper]:
mcp = MetadataChangeProposalWrapper(
entityUrn=str(self.urn),
aspect=DataJobInputOutputClass(
@@ -157,10 +163,9 @@ def generate_data_input_output_mcp(self) -> Iterable[MetadataChangeProposalWrapp
yield mcp
# Force entity materialization
- for iolet in self.inlets + self.outlets:
- mcp = MetadataChangeProposalWrapper(
- entityUrn=str(iolet),
- aspect=StatusClass(removed=False),
- )
-
- yield mcp
+ if materialize_iolets:
+ for iolet in self.inlets + self.outlets:
+ yield MetadataChangeProposalWrapper(
+ entityUrn=str(iolet),
+ aspect=StatusClass(removed=False),
+ )
diff --git a/metadata-ingestion/src/datahub/api/entities/dataprocess/dataprocess_instance.py b/metadata-ingestion/src/datahub/api/entities/dataprocess/dataprocess_instance.py
index cf6080c7072e6..2f07e4a112f93 100644
--- a/metadata-ingestion/src/datahub/api/entities/dataprocess/dataprocess_instance.py
+++ b/metadata-ingestion/src/datahub/api/entities/dataprocess/dataprocess_instance.py
@@ -220,12 +220,10 @@ def emit_process_end(
self._emit_mcp(mcp, emitter, callback)
def generate_mcp(
- self, created_ts_millis: Optional[int] = None
+ self, created_ts_millis: Optional[int] = None, materialize_iolets: bool = True
) -> Iterable[MetadataChangeProposalWrapper]:
- """
- Generates mcps from the object
- :rtype: Iterable[MetadataChangeProposalWrapper]
- """
+ """Generates mcps from the object"""
+
mcp = MetadataChangeProposalWrapper(
entityUrn=str(self.urn),
aspect=DataProcessInstanceProperties(
@@ -253,7 +251,7 @@ def generate_mcp(
)
yield mcp
- yield from self.generate_inlet_outlet_mcp()
+ yield from self.generate_inlet_outlet_mcp(materialize_iolets=materialize_iolets)
@staticmethod
def _emit_mcp(
@@ -329,7 +327,9 @@ def from_dataflow(dataflow: DataFlow, id: str) -> "DataProcessInstance":
dpi._template_object = dataflow
return dpi
- def generate_inlet_outlet_mcp(self) -> Iterable[MetadataChangeProposalWrapper]:
+ def generate_inlet_outlet_mcp(
+ self, materialize_iolets: bool
+ ) -> Iterable[MetadataChangeProposalWrapper]:
if self.inlets:
mcp = MetadataChangeProposalWrapper(
entityUrn=str(self.urn),
@@ -349,10 +349,9 @@ def generate_inlet_outlet_mcp(self) -> Iterable[MetadataChangeProposalWrapper]:
yield mcp
# Force entity materialization
- for iolet in self.inlets + self.outlets:
- mcp = MetadataChangeProposalWrapper(
- entityUrn=str(iolet),
- aspect=StatusClass(removed=False),
- )
-
- yield mcp
+ if materialize_iolets:
+ for iolet in self.inlets + self.outlets:
+ yield MetadataChangeProposalWrapper(
+ entityUrn=str(iolet),
+ aspect=StatusClass(removed=False),
+ )
diff --git a/metadata-ingestion/src/datahub/emitter/mcp.py b/metadata-ingestion/src/datahub/emitter/mcp.py
index 9085ac152ea0b..d6aa695665e4e 100644
--- a/metadata-ingestion/src/datahub/emitter/mcp.py
+++ b/metadata-ingestion/src/datahub/emitter/mcp.py
@@ -240,7 +240,7 @@ def from_obj_require_wrapper(
return mcp
def as_workunit(
- self, *, treat_errors_as_warnings: bool = False
+ self, *, treat_errors_as_warnings: bool = False, is_primary_source: bool = True
) -> "MetadataWorkUnit":
from datahub.ingestion.api.workunit import MetadataWorkUnit
@@ -254,10 +254,12 @@ def as_workunit(
id=f"{self.entityUrn}-{self.aspectName}-{ts}",
mcp=self,
treat_errors_as_warnings=treat_errors_as_warnings,
+ is_primary_source=is_primary_source,
)
return MetadataWorkUnit(
id=f"{self.entityUrn}-{self.aspectName}",
mcp=self,
treat_errors_as_warnings=treat_errors_as_warnings,
+ is_primary_source=is_primary_source,
)
diff --git a/metadata-ingestion/src/datahub/ingestion/api/source_helpers.py b/metadata-ingestion/src/datahub/ingestion/api/source_helpers.py
index 2ce9e07bc57bc..fae260226195c 100644
--- a/metadata-ingestion/src/datahub/ingestion/api/source_helpers.py
+++ b/metadata-ingestion/src/datahub/ingestion/api/source_helpers.py
@@ -17,6 +17,7 @@
from datahub.configuration.time_window_config import BaseTimeWindowConfig
from datahub.emitter.mce_builder import make_dataplatform_instance_urn
from datahub.emitter.mcp import MetadataChangeProposalWrapper
+from datahub.emitter.mcp_builder import entity_supports_aspect
from datahub.ingestion.api.workunit import MetadataWorkUnit
from datahub.metadata.schema_classes import (
BrowsePathEntryClass,
@@ -64,9 +65,9 @@ def auto_status_aspect(
"""
For all entities that don't have a status aspect, add one with removed set to false.
"""
-
all_urns: Set[str] = set()
status_urns: Set[str] = set()
+ skip_urns: Set[str] = set()
for wu in stream:
urn = wu.get_urn()
all_urns.add(urn)
@@ -89,9 +90,17 @@ def auto_status_aspect(
else:
raise ValueError(f"Unexpected type {type(wu.metadata)}")
+ if not isinstance(
+ wu.metadata, MetadataChangeEventClass
+ ) and not entity_supports_aspect(wu.metadata.entityType, StatusClass):
+ # If any entity does not support aspect 'status' then skip that entity from adding status aspect.
+ # Example like dataProcessInstance doesn't suppport status aspect.
+ # If not skipped gives error: java.lang.RuntimeException: Unknown aspect status for entity dataProcessInstance
+ skip_urns.add(urn)
+
yield wu
- for urn in sorted(all_urns - status_urns):
+ for urn in sorted(all_urns - status_urns - skip_urns):
yield MetadataChangeProposalWrapper(
entityUrn=urn,
aspect=StatusClass(removed=False),
diff --git a/metadata-ingestion/src/datahub/ingestion/source/fivetran/__init__.py b/metadata-ingestion/src/datahub/ingestion/source/fivetran/__init__.py
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/metadata-ingestion/src/datahub/ingestion/source/fivetran/config.py b/metadata-ingestion/src/datahub/ingestion/source/fivetran/config.py
new file mode 100644
index 0000000000000..b0843182c5cac
--- /dev/null
+++ b/metadata-ingestion/src/datahub/ingestion/source/fivetran/config.py
@@ -0,0 +1,145 @@
+import logging
+from dataclasses import dataclass, field as dataclass_field
+from typing import Dict, List, Optional
+
+import pydantic
+from pydantic import Field, root_validator
+
+from datahub.configuration.common import AllowDenyPattern, ConfigModel
+from datahub.configuration.source_common import DEFAULT_ENV, DatasetSourceConfigMixin
+from datahub.ingestion.source.state.stale_entity_removal_handler import (
+ StaleEntityRemovalSourceReport,
+ StatefulStaleMetadataRemovalConfig,
+)
+from datahub.ingestion.source.state.stateful_ingestion_base import (
+ StatefulIngestionConfigBase,
+)
+from datahub.ingestion.source_config.sql.snowflake import BaseSnowflakeConfig
+
+logger = logging.getLogger(__name__)
+
+
+class Constant:
+ """
+ keys used in fivetran plugin
+ """
+
+ ORCHESTRATOR = "fivetran"
+ # table column name
+ SOURCE_SCHEMA_NAME = "source_schema_name"
+ SOURCE_TABLE_NAME = "source_table_name"
+ SOURCE_TABLE_ID = "source_table_id"
+ SOURCE_COLUMN_NAME = "source_column_name"
+ DESTINATION_SCHEMA_NAME = "destination_schema_name"
+ DESTINATION_TABLE_NAME = "destination_table_name"
+ DESTINATION_TABLE_ID = "destination_table_id"
+ DESTINATION_COLUMN_NAME = "destination_column_name"
+ SYNC_ID = "sync_id"
+ MESSAGE_DATA = "message_data"
+ TIME_STAMP = "time_stamp"
+ STATUS = "status"
+ USER_ID = "user_id"
+ GIVEN_NAME = "given_name"
+ FAMILY_NAME = "family_name"
+ CONNECTOR_ID = "connector_id"
+ CONNECTOR_NAME = "connector_name"
+ CONNECTOR_TYPE_ID = "connector_type_id"
+ PAUSED = "paused"
+ SYNC_FREQUENCY = "sync_frequency"
+ DESTINATION_ID = "destination_id"
+ CONNECTING_USER_ID = "connecting_user_id"
+ # Job status constants
+ SUCCESSFUL = "SUCCESSFUL"
+ FAILURE_WITH_TASK = "FAILURE_WITH_TASK"
+ CANCELED = "CANCELED"
+
+
+KNOWN_DATA_PLATFORM_MAPPING = {
+ "postgres": "postgres",
+ "snowflake": "snowflake",
+}
+
+
+class DestinationConfig(BaseSnowflakeConfig):
+ database: str = Field(description="The fivetran connector log database.")
+ log_schema: str = Field(description="The fivetran connector log schema.")
+
+
+class FivetranLogConfig(ConfigModel):
+ destination_platform: str = pydantic.Field(
+ default="snowflake",
+ description="The destination platform where fivetran connector log tables are dumped.",
+ )
+ destination_config: Optional[DestinationConfig] = pydantic.Field(
+ default=None,
+ description="If destination platform is 'snowflake', provide snowflake configuration.",
+ )
+
+ @root_validator(pre=True)
+ def validate_destination_platfrom_and_config(cls, values: Dict) -> Dict:
+ destination_platform = values["destination_platform"]
+ if destination_platform == "snowflake":
+ if "destination_config" not in values:
+ raise ValueError(
+ "If destination platform is 'snowflake', user must provide snowflake destination configuration in the recipe."
+ )
+ else:
+ raise ValueError(
+ f"Destination platform '{destination_platform}' is not yet supported."
+ )
+ return values
+
+
+@dataclass
+class FivetranSourceReport(StaleEntityRemovalSourceReport):
+ connectors_scanned: int = 0
+ filtered_connectors: List[str] = dataclass_field(default_factory=list)
+
+ def report_connectors_scanned(self, count: int = 1) -> None:
+ self.connectors_scanned += count
+
+ def report_connectors_dropped(self, model: str) -> None:
+ self.filtered_connectors.append(model)
+
+
+class PlatformDetail(ConfigModel):
+ platform_instance: Optional[str] = pydantic.Field(
+ default=None,
+ description="The instance of the platform that all assets produced by this recipe belong to",
+ )
+ env: str = pydantic.Field(
+ default=DEFAULT_ENV,
+ description="The environment that all assets produced by DataHub platform ingestion source belong to",
+ )
+
+
+class FivetranSourceConfig(StatefulIngestionConfigBase, DatasetSourceConfigMixin):
+ fivetran_log_config: FivetranLogConfig = pydantic.Field(
+ description="Fivetran log connector destination server configurations.",
+ )
+ connector_patterns: AllowDenyPattern = Field(
+ default=AllowDenyPattern.allow_all(),
+ description="Regex patterns for connectors to filter in ingestion.",
+ )
+ include_column_lineage: bool = Field(
+ default=True,
+ description="Populates table->table column lineage.",
+ )
+ sources_to_database: Dict[str, str] = pydantic.Field(
+ default={},
+ description="A mapping of the connector's all sources to its database. Use connector id as key.",
+ )
+ # Configuration for stateful ingestion
+ stateful_ingestion: Optional[StatefulStaleMetadataRemovalConfig] = pydantic.Field(
+ default=None, description="Airbyte Stateful Ingestion Config."
+ )
+ # Fivetran connector all sources to platform instance mapping
+ sources_to_platform_instance: Dict[str, PlatformDetail] = pydantic.Field(
+ default={},
+ description="A mapping of the connector's all sources dataset to platform instance. Use connector id as key.",
+ )
+ # Fivetran destination to platform instance mapping
+ destination_to_platform_instance: Dict[str, PlatformDetail] = pydantic.Field(
+ default={},
+ description="A mapping of destination dataset to platform instance. Use destination id as key.",
+ )
diff --git a/metadata-ingestion/src/datahub/ingestion/source/fivetran/data_classes.py b/metadata-ingestion/src/datahub/ingestion/source/fivetran/data_classes.py
new file mode 100644
index 0000000000000..82bb5f3467c2a
--- /dev/null
+++ b/metadata-ingestion/src/datahub/ingestion/source/fivetran/data_classes.py
@@ -0,0 +1,36 @@
+from dataclasses import dataclass
+from typing import List
+
+
+@dataclass
+class ColumnLineage:
+ source_column: str
+ destination_column: str
+
+
+@dataclass
+class TableLineage:
+ source_table: str
+ destination_table: str
+ column_lineage: List[ColumnLineage]
+
+
+@dataclass
+class Connector:
+ connector_id: str
+ connector_name: str
+ connector_type: str
+ paused: bool
+ sync_frequency: int
+ destination_id: str
+ user_name: str
+ table_lineage: List[TableLineage]
+ jobs: List["Job"]
+
+
+@dataclass
+class Job:
+ job_id: str
+ start_time: int
+ end_time: int
+ status: str
diff --git a/metadata-ingestion/src/datahub/ingestion/source/fivetran/fivetran.py b/metadata-ingestion/src/datahub/ingestion/source/fivetran/fivetran.py
new file mode 100644
index 0000000000000..c0395b4e4e796
--- /dev/null
+++ b/metadata-ingestion/src/datahub/ingestion/source/fivetran/fivetran.py
@@ -0,0 +1,289 @@
+import logging
+from typing import Dict, Iterable, List, Optional
+
+import datahub.emitter.mce_builder as builder
+from datahub.api.entities.datajob import DataFlow, DataJob
+from datahub.api.entities.dataprocess.dataprocess_instance import (
+ DataProcessInstance,
+ InstanceRunResult,
+)
+from datahub.ingestion.api.common import PipelineContext
+from datahub.ingestion.api.decorators import (
+ SourceCapability,
+ SupportStatus,
+ capability,
+ config_class,
+ platform_name,
+ support_status,
+)
+from datahub.ingestion.api.source import MetadataWorkUnitProcessor, Source, SourceReport
+from datahub.ingestion.api.workunit import MetadataWorkUnit
+from datahub.ingestion.source.fivetran.config import (
+ KNOWN_DATA_PLATFORM_MAPPING,
+ Constant,
+ FivetranSourceConfig,
+ FivetranSourceReport,
+ PlatformDetail,
+)
+from datahub.ingestion.source.fivetran.data_classes import Connector, Job
+from datahub.ingestion.source.fivetran.fivetran_log_api import FivetranLogAPI
+from datahub.ingestion.source.state.stale_entity_removal_handler import (
+ StaleEntityRemovalHandler,
+)
+from datahub.ingestion.source.state.stateful_ingestion_base import (
+ StatefulIngestionSourceBase,
+)
+from datahub.metadata.com.linkedin.pegasus2avro.dataset import (
+ FineGrainedLineage,
+ FineGrainedLineageDownstreamType,
+ FineGrainedLineageUpstreamType,
+)
+from datahub.metadata.schema_classes import StatusClass
+from datahub.utilities.urns.data_flow_urn import DataFlowUrn
+from datahub.utilities.urns.dataset_urn import DatasetUrn
+
+# Logger instance
+logger = logging.getLogger(__name__)
+
+
+@platform_name("Fivetran")
+@config_class(FivetranSourceConfig)
+@support_status(SupportStatus.INCUBATING)
+@capability(SourceCapability.PLATFORM_INSTANCE, "Enabled by default")
+@capability(
+ SourceCapability.LINEAGE_FINE,
+ "Enabled by default, can be disabled via configuration `include_column_lineage`",
+)
+class FivetranSource(StatefulIngestionSourceBase):
+ """
+ This plugin extracts fivetran users, connectors, destinations and sync history.
+ This plugin is in beta and has only been tested on Snowflake connector.
+ """
+
+ config: FivetranSourceConfig
+ report: FivetranSourceReport
+ platform: str = "fivetran"
+
+ def __init__(self, config: FivetranSourceConfig, ctx: PipelineContext):
+ super(FivetranSource, self).__init__(config, ctx)
+ self.config = config
+ self.report = FivetranSourceReport()
+
+ self.audit_log = FivetranLogAPI(self.config.fivetran_log_config)
+
+ # Create and register the stateful ingestion use-case handler.
+ self.stale_entity_removal_handler = StaleEntityRemovalHandler.create(
+ self, self.config, self.ctx
+ )
+
+ def _extend_lineage(self, connector: Connector, datajob: DataJob) -> None:
+ input_dataset_urn_list: List[DatasetUrn] = []
+ output_dataset_urn_list: List[DatasetUrn] = []
+ fine_grained_lineage: List[FineGrainedLineage] = []
+
+ source_platform_detail: PlatformDetail = PlatformDetail()
+ destination_platform_detail: PlatformDetail = PlatformDetail()
+ # Get platform details for connector source
+ source_platform_detail = self.config.sources_to_platform_instance.get(
+ connector.connector_id, PlatformDetail()
+ )
+
+ # Get platform details for destination
+ destination_platform_detail = self.config.destination_to_platform_instance.get(
+ connector.destination_id, PlatformDetail()
+ )
+
+ # Get database for connector source
+ # TODO: Once Fivetran exposes this, we shouldn't ask for it via config.
+ source_database: Optional[str] = self.config.sources_to_database.get(
+ connector.connector_id
+ )
+
+ if connector.connector_type in KNOWN_DATA_PLATFORM_MAPPING:
+ source_platform = KNOWN_DATA_PLATFORM_MAPPING[connector.connector_type]
+ else:
+ source_platform = connector.connector_type
+ logger.info(
+ f"Fivetran connector source type: {connector.connector_type} is not supported to mapped with Datahub dataset entity."
+ )
+
+ for table_lineage in connector.table_lineage:
+ input_dataset_urn = DatasetUrn.create_from_ids(
+ platform_id=source_platform,
+ table_name=f"{source_database.lower()}.{table_lineage.source_table}"
+ if source_database
+ else table_lineage.source_table,
+ env=source_platform_detail.env,
+ platform_instance=source_platform_detail.platform_instance,
+ )
+ input_dataset_urn_list.append(input_dataset_urn)
+
+ output_dataset_urn: Optional[DatasetUrn] = None
+ if self.audit_log.fivetran_log_database:
+ output_dataset_urn = DatasetUrn.create_from_ids(
+ platform_id=self.config.fivetran_log_config.destination_platform,
+ table_name=f"{self.audit_log.fivetran_log_database.lower()}.{table_lineage.destination_table}",
+ env=destination_platform_detail.env,
+ platform_instance=destination_platform_detail.platform_instance,
+ )
+ output_dataset_urn_list.append(output_dataset_urn)
+
+ if self.config.include_column_lineage:
+ for column_lineage in table_lineage.column_lineage:
+ fine_grained_lineage.append(
+ FineGrainedLineage(
+ upstreamType=FineGrainedLineageUpstreamType.FIELD_SET,
+ upstreams=[
+ builder.make_schema_field_urn(
+ str(input_dataset_urn),
+ column_lineage.source_column,
+ )
+ ]
+ if input_dataset_urn
+ else [],
+ downstreamType=FineGrainedLineageDownstreamType.FIELD,
+ downstreams=[
+ builder.make_schema_field_urn(
+ str(output_dataset_urn),
+ column_lineage.destination_column,
+ )
+ ]
+ if output_dataset_urn
+ else [],
+ )
+ )
+
+ datajob.inlets.extend(input_dataset_urn_list)
+ datajob.outlets.extend(output_dataset_urn_list)
+ datajob.fine_grained_lineages.extend(fine_grained_lineage)
+ return None
+
+ def _generate_dataflow_from_connector(self, connector: Connector) -> DataFlow:
+ return DataFlow(
+ orchestrator=Constant.ORCHESTRATOR,
+ id=connector.connector_id,
+ env=self.config.env,
+ name=connector.connector_name,
+ platform_instance=self.config.platform_instance,
+ )
+
+ def _generate_datajob_from_connector(self, connector: Connector) -> DataJob:
+ dataflow_urn = DataFlowUrn.create_from_ids(
+ orchestrator=Constant.ORCHESTRATOR,
+ flow_id=connector.connector_id,
+ env=self.config.env,
+ platform_instance=self.config.platform_instance,
+ )
+ datajob = DataJob(
+ id=connector.connector_id,
+ flow_urn=dataflow_urn,
+ name=connector.connector_name,
+ owners={connector.user_name},
+ )
+
+ job_property_bag: Dict[str, str] = {}
+ allowed_connection_keys = [
+ Constant.PAUSED,
+ Constant.SYNC_FREQUENCY,
+ Constant.DESTINATION_ID,
+ ]
+ for key in allowed_connection_keys:
+ if hasattr(connector, key) and getattr(connector, key) is not None:
+ job_property_bag[key] = repr(getattr(connector, key))
+ datajob.properties = job_property_bag
+
+ # Map connector source and destination table with dataset entity
+ # Also extend the fine grained lineage of column if include_column_lineage is True
+ self._extend_lineage(connector=connector, datajob=datajob)
+
+ # TODO: Add fine grained lineages of dataset after FineGrainedLineageDownstreamType.DATASET enabled
+
+ return datajob
+
+ def _generate_dpi_from_job(self, job: Job, datajob: DataJob) -> DataProcessInstance:
+ return DataProcessInstance.from_datajob(
+ datajob=datajob,
+ id=job.job_id,
+ clone_inlets=True,
+ clone_outlets=True,
+ )
+
+ def _get_dpi_workunits(
+ self, job: Job, dpi: DataProcessInstance
+ ) -> Iterable[MetadataWorkUnit]:
+ status_result_map: Dict[str, InstanceRunResult] = {
+ Constant.SUCCESSFUL: InstanceRunResult.SUCCESS,
+ Constant.FAILURE_WITH_TASK: InstanceRunResult.FAILURE,
+ Constant.CANCELED: InstanceRunResult.SKIPPED,
+ }
+ if job.status not in status_result_map:
+ logger.debug(
+ f"Status should be either SUCCESSFUL, FAILURE_WITH_TASK or CANCELED and it was "
+ f"{job.status}"
+ )
+ return []
+ result = status_result_map[job.status]
+ start_timestamp_millis = job.start_time * 1000
+ for mcp in dpi.generate_mcp(
+ created_ts_millis=start_timestamp_millis, materialize_iolets=False
+ ):
+ yield mcp.as_workunit()
+ for mcp in dpi.start_event_mcp(start_timestamp_millis):
+ yield mcp.as_workunit()
+ for mcp in dpi.end_event_mcp(
+ end_timestamp_millis=job.end_time * 1000,
+ result=result,
+ result_type=Constant.ORCHESTRATOR,
+ ):
+ yield mcp.as_workunit()
+
+ def _get_connector_workunits(
+ self, connector: Connector
+ ) -> Iterable[MetadataWorkUnit]:
+ self.report.report_connectors_scanned()
+ # Create dataflow entity with same name as connector name
+ dataflow = self._generate_dataflow_from_connector(connector)
+ for mcp in dataflow.generate_mcp():
+ yield mcp.as_workunit()
+
+ # Map Fivetran's connector entity with Datahub's datajob entity
+ datajob = self._generate_datajob_from_connector(connector)
+ for mcp in datajob.generate_mcp(materialize_iolets=True):
+ if mcp.entityType == "dataset" and isinstance(mcp.aspect, StatusClass):
+ # While we "materialize" the referenced datasets, we don't want them
+ # to be tracked by stateful ingestion.
+ yield mcp.as_workunit(is_primary_source=False)
+ else:
+ yield mcp.as_workunit()
+
+ # Map Fivetran's job/sync history entity with Datahub's data process entity
+ for job in connector.jobs:
+ dpi = self._generate_dpi_from_job(job, datajob)
+ yield from self._get_dpi_workunits(job, dpi)
+
+ @classmethod
+ def create(cls, config_dict: dict, ctx: PipelineContext) -> Source:
+ config = FivetranSourceConfig.parse_obj(config_dict)
+ return cls(config, ctx)
+
+ def get_workunit_processors(self) -> List[Optional[MetadataWorkUnitProcessor]]:
+ return [
+ *super().get_workunit_processors(),
+ self.stale_entity_removal_handler.workunit_processor,
+ ]
+
+ def get_workunits_internal(self) -> Iterable[MetadataWorkUnit]:
+ """
+ Datahub Ingestion framework invoke this method
+ """
+ logger.info("Fivetran plugin execution is started")
+ connectors = self.audit_log.get_connectors_list()
+ for connector in connectors:
+ if not self.config.connector_patterns.allowed(connector.connector_name):
+ self.report.report_connectors_dropped(connector.connector_name)
+ continue
+ logger.info(f"Processing connector id: {connector.connector_id}")
+ yield from self._get_connector_workunits(connector)
+
+ def get_report(self) -> SourceReport:
+ return self.report
diff --git a/metadata-ingestion/src/datahub/ingestion/source/fivetran/fivetran_log_api.py b/metadata-ingestion/src/datahub/ingestion/source/fivetran/fivetran_log_api.py
new file mode 100644
index 0000000000000..d5d146559d918
--- /dev/null
+++ b/metadata-ingestion/src/datahub/ingestion/source/fivetran/fivetran_log_api.py
@@ -0,0 +1,147 @@
+import json
+import logging
+from typing import Any, Dict, List, Optional
+
+from sqlalchemy import create_engine
+
+from datahub.ingestion.source.fivetran.config import Constant, FivetranLogConfig
+from datahub.ingestion.source.fivetran.data_classes import (
+ ColumnLineage,
+ Connector,
+ Job,
+ TableLineage,
+)
+from datahub.ingestion.source.fivetran.fivetran_query import FivetranLogQuery
+
+logger: logging.Logger = logging.getLogger(__name__)
+
+
+class FivetranLogAPI:
+ def __init__(self, fivetran_log_config: FivetranLogConfig) -> None:
+ self.fivetran_log_database: Optional[str] = None
+ self.fivetran_log_config = fivetran_log_config
+ self.engine = self._get_log_destination_engine()
+
+ def _get_log_destination_engine(self) -> Any:
+ destination_platform = self.fivetran_log_config.destination_platform
+ engine = None
+ # For every destination, create sqlalchemy engine,
+ # select the database and schema and set fivetran_log_database class variable
+ if destination_platform == "snowflake":
+ snowflake_destination_config = self.fivetran_log_config.destination_config
+ if snowflake_destination_config is not None:
+ engine = create_engine(
+ snowflake_destination_config.get_sql_alchemy_url(),
+ **snowflake_destination_config.get_options(),
+ )
+ engine.execute(
+ FivetranLogQuery.use_schema(
+ snowflake_destination_config.database,
+ snowflake_destination_config.log_schema,
+ )
+ )
+ self.fivetran_log_database = snowflake_destination_config.database
+ return engine
+
+ def _query(self, query: str) -> List[Dict]:
+ logger.debug("Query : {}".format(query))
+ resp = self.engine.execute(query)
+ return [row for row in resp]
+
+ def _get_table_lineage(self, connector_id: str) -> List[TableLineage]:
+ table_lineage_result = self._query(
+ FivetranLogQuery.get_table_lineage_query(connector_id=connector_id)
+ )
+ table_lineage_list: List[TableLineage] = []
+ for table_lineage in table_lineage_result:
+ column_lineage_result = self._query(
+ FivetranLogQuery.get_column_lineage_query(
+ source_table_id=table_lineage[Constant.SOURCE_TABLE_ID],
+ destination_table_id=table_lineage[Constant.DESTINATION_TABLE_ID],
+ )
+ )
+ column_lineage_list: List[ColumnLineage] = [
+ ColumnLineage(
+ source_column=column_lineage[Constant.SOURCE_COLUMN_NAME],
+ destination_column=column_lineage[Constant.DESTINATION_COLUMN_NAME],
+ )
+ for column_lineage in column_lineage_result
+ ]
+ table_lineage_list.append(
+ TableLineage(
+ source_table=f"{table_lineage[Constant.SOURCE_SCHEMA_NAME]}.{table_lineage[Constant.SOURCE_TABLE_NAME]}",
+ destination_table=f"{table_lineage[Constant.DESTINATION_SCHEMA_NAME]}.{table_lineage[Constant.DESTINATION_TABLE_NAME]}",
+ column_lineage=column_lineage_list,
+ )
+ )
+
+ return table_lineage_list
+
+ def _get_jobs_list(self, connector_id: str) -> List[Job]:
+ jobs: List[Job] = []
+ sync_start_logs = {
+ row[Constant.SYNC_ID]: row
+ for row in self._query(
+ FivetranLogQuery.get_sync_start_logs_query(connector_id=connector_id)
+ )
+ }
+ sync_end_logs = {
+ row[Constant.SYNC_ID]: row
+ for row in self._query(
+ FivetranLogQuery.get_sync_end_logs_query(connector_id=connector_id)
+ )
+ }
+ for sync_id in sync_start_logs.keys():
+ if sync_end_logs.get(sync_id) is None:
+ # If no sync-end event log for this sync id that means sync is still in progress
+ continue
+
+ message_data = json.loads(sync_end_logs[sync_id][Constant.MESSAGE_DATA])
+ if isinstance(message_data, str):
+ # Sometimes message_data contains json string inside string
+ # Ex: '"{\"status\":\"SUCCESSFUL\"}"'
+ # Hence, need to do json loads twice.
+ message_data = json.loads(message_data)
+
+ jobs.append(
+ Job(
+ job_id=sync_id,
+ start_time=round(
+ sync_start_logs[sync_id][Constant.TIME_STAMP].timestamp()
+ ),
+ end_time=round(
+ sync_end_logs[sync_id][Constant.TIME_STAMP].timestamp()
+ ),
+ status=message_data[Constant.STATUS],
+ )
+ )
+ return jobs
+
+ def _get_user_name(self, user_id: str) -> str:
+ user_details = self._query(FivetranLogQuery.get_user_query(user_id=user_id))[0]
+ return (
+ f"{user_details[Constant.GIVEN_NAME]} {user_details[Constant.FAMILY_NAME]}"
+ )
+
+ def get_connectors_list(self) -> List[Connector]:
+ connectors: List[Connector] = []
+ connector_list = self._query(FivetranLogQuery.get_connectors_query())
+ for connector in connector_list:
+ connectors.append(
+ Connector(
+ connector_id=connector[Constant.CONNECTOR_ID],
+ connector_name=connector[Constant.CONNECTOR_NAME],
+ connector_type=connector[Constant.CONNECTOR_TYPE_ID],
+ paused=connector[Constant.PAUSED],
+ sync_frequency=connector[Constant.SYNC_FREQUENCY],
+ destination_id=connector[Constant.DESTINATION_ID],
+ user_name=self._get_user_name(
+ connector[Constant.CONNECTING_USER_ID]
+ ),
+ table_lineage=self._get_table_lineage(
+ connector[Constant.CONNECTOR_ID]
+ ),
+ jobs=self._get_jobs_list(connector[Constant.CONNECTOR_ID]),
+ )
+ )
+ return connectors
diff --git a/metadata-ingestion/src/datahub/ingestion/source/fivetran/fivetran_query.py b/metadata-ingestion/src/datahub/ingestion/source/fivetran/fivetran_query.py
new file mode 100644
index 0000000000000..4f52fcd5d884f
--- /dev/null
+++ b/metadata-ingestion/src/datahub/ingestion/source/fivetran/fivetran_query.py
@@ -0,0 +1,76 @@
+class FivetranLogQuery:
+ @staticmethod
+ def use_schema(db_name: str, schema_name: str) -> str:
+ return f'use schema "{db_name}"."{schema_name}"'
+
+ @staticmethod
+ def get_connectors_query() -> str:
+ return """
+ SELECT connector_id as "CONNECTOR_ID",
+ connecting_user_id as "CONNECTING_USER_ID",
+ connector_type_id as "CONNECTOR_TYPE_ID",
+ connector_name as "CONNECTOR_NAME",
+ paused as "PAUSED",
+ sync_frequency as "SYNC_FREQUENCY",
+ destination_id as "DESTINATION_ID"
+ FROM CONNECTOR
+ WHERE _fivetran_deleted = FALSE"""
+
+ @staticmethod
+ def get_user_query(user_id: str) -> str:
+ return f"""
+ SELECT id as "USER_ID",
+ given_name as "GIVEN_NAME",
+ family_name as "FAMILY_NAME"
+ FROM USER
+ WHERE id = '{user_id}'"""
+
+ @staticmethod
+ def get_sync_start_logs_query(
+ connector_id: str,
+ ) -> str:
+ return f"""
+ SELECT time_stamp as "TIME_STAMP",
+ sync_id as "SYNC_ID"
+ FROM LOG
+ WHERE message_event = 'sync_start'
+ and connector_id = '{connector_id}' order by time_stamp"""
+
+ @staticmethod
+ def get_sync_end_logs_query(connector_id: str) -> str:
+ return f"""
+ SELECT time_stamp as "TIME_STAMP",
+ sync_id as "SYNC_ID",
+ message_data as "MESSAGE_DATA"
+ FROM LOG
+ WHERE message_event = 'sync_end'
+ and connector_id = '{connector_id}' order by time_stamp"""
+
+ @staticmethod
+ def get_table_lineage_query(connector_id: str) -> str:
+ return f"""
+ SELECT stm.id as "SOURCE_TABLE_ID",
+ stm.name as "SOURCE_TABLE_NAME",
+ ssm.name as "SOURCE_SCHEMA_NAME",
+ dtm.id as "DESTINATION_TABLE_ID",
+ dtm.name as "DESTINATION_TABLE_NAME",
+ dsm.name as "DESTINATION_SCHEMA_NAME"
+ FROM table_lineage as tl
+ JOIN source_table_metadata as stm on tl.source_table_id = stm.id
+ JOIN destination_table_metadata as dtm on tl.destination_table_id = dtm.id
+ JOIN source_schema_metadata as ssm on stm.schema_id = ssm.id
+ JOIN destination_schema_metadata as dsm on dtm.schema_id = dsm.id
+ WHERE stm.connector_id = '{connector_id}'"""
+
+ @staticmethod
+ def get_column_lineage_query(
+ source_table_id: str, destination_table_id: str
+ ) -> str:
+ return f"""
+ SELECT scm.name as "SOURCE_COLUMN_NAME",
+ dcm.name as "DESTINATION_COLUMN_NAME"
+ FROM column_lineage as cl
+ JOIN source_column_metadata as scm on
+ (cl.source_column_id = scm.id and scm.table_id = {source_table_id})
+ JOIN destination_column_metadata as dcm on
+ (cl.destination_column_id = dcm.id and dcm.table_id = {destination_table_id})"""
diff --git a/metadata-ingestion/src/datahub/ingestion/source_config/sql/snowflake.py b/metadata-ingestion/src/datahub/ingestion/source_config/sql/snowflake.py
index c3e8c175f1de5..9fc697018ecd6 100644
--- a/metadata-ingestion/src/datahub/ingestion/source_config/sql/snowflake.py
+++ b/metadata-ingestion/src/datahub/ingestion/source_config/sql/snowflake.py
@@ -12,7 +12,7 @@
OAUTH_AUTHENTICATOR,
)
-from datahub.configuration.common import AllowDenyPattern
+from datahub.configuration.common import AllowDenyPattern, ConfigModel
from datahub.configuration.oauth import OAuthConfiguration, OAuthIdentityProvider
from datahub.configuration.time_window_config import BaseTimeWindowConfig
from datahub.configuration.validate_field_rename import pydantic_renamed_field
@@ -42,9 +42,14 @@
SNOWFLAKE_HOST_SUFFIX = ".snowflakecomputing.com"
-class BaseSnowflakeConfig(BaseTimeWindowConfig):
+class BaseSnowflakeConfig(ConfigModel):
# Note: this config model is also used by the snowflake-usage source.
+ options: dict = pydantic.Field(
+ default_factory=dict,
+ description="Any options specified here will be passed to [SQLAlchemy.create_engine](https://docs.sqlalchemy.org/en/14/core/engines.html#sqlalchemy.create_engine) as kwargs.",
+ )
+
scheme: str = "snowflake"
username: Optional[str] = pydantic.Field(
default=None, description="Snowflake username."
@@ -82,14 +87,6 @@ class BaseSnowflakeConfig(BaseTimeWindowConfig):
default=None, description="Snowflake warehouse."
)
role: Optional[str] = pydantic.Field(default=None, description="Snowflake role.")
- include_table_lineage: bool = pydantic.Field(
- default=True,
- description="If enabled, populates the snowflake table-to-table and s3-to-snowflake table lineage. Requires appropriate grants given to the role and Snowflake Enterprise Edition or above.",
- )
- include_view_lineage: bool = pydantic.Field(
- default=True,
- description="If enabled, populates the snowflake view->table and table->view lineages. Requires appropriate grants given to the role, and include_table_lineage to be True. view->table lineage requires Snowflake Enterprise Edition or above.",
- )
connect_args: Optional[Dict[str, Any]] = pydantic.Field(
default=None,
description="Connect args to pass to Snowflake SqlAlchemy driver",
@@ -166,18 +163,6 @@ def _check_oauth_config(oauth_config: Optional[OAuthConfiguration]) -> None:
"but should be set when using use_certificate false for oauth_config"
)
- @pydantic.root_validator()
- def validate_include_view_lineage(cls, values):
- if (
- "include_table_lineage" in values
- and not values.get("include_table_lineage")
- and values.get("include_view_lineage")
- ):
- raise ValueError(
- "include_table_lineage must be True for include_view_lineage to be set."
- )
- return values
-
def get_sql_alchemy_url(
self,
database: Optional[str] = None,
@@ -261,28 +246,8 @@ def get_connect_args(self) -> dict:
self._computed_connect_args = connect_args
return connect_args
-
-class SnowflakeConfig(BaseSnowflakeConfig, SQLCommonConfig):
- database_pattern: AllowDenyPattern = AllowDenyPattern(
- deny=[r"^UTIL_DB$", r"^SNOWFLAKE$", r"^SNOWFLAKE_SAMPLE_DATA$"]
- )
-
- ignore_start_time_lineage: bool = False
- upstream_lineage_in_report: bool = False
-
- def get_sql_alchemy_url(
- self,
- database: Optional[str] = None,
- username: Optional[str] = None,
- password: Optional[pydantic.SecretStr] = None,
- role: Optional[str] = None,
- ) -> str:
- return super().get_sql_alchemy_url(
- database=database, username=username, password=password, role=role
- )
-
def get_options(self) -> dict:
- options_connect_args: Dict = super().get_connect_args()
+ options_connect_args: Dict = self.get_connect_args()
options_connect_args.update(self.options.get("connect_args", {}))
self.options["connect_args"] = options_connect_args
return self.options
@@ -372,3 +337,34 @@ def get_connection(self) -> snowflake.connector.SnowflakeConnection:
else:
# not expected to be here
raise Exception("Not expected to be here.")
+
+
+class SnowflakeConfig(BaseSnowflakeConfig, BaseTimeWindowConfig, SQLCommonConfig):
+
+ include_table_lineage: bool = pydantic.Field(
+ default=True,
+ description="If enabled, populates the snowflake table-to-table and s3-to-snowflake table lineage. Requires appropriate grants given to the role and Snowflake Enterprise Edition or above.",
+ )
+ include_view_lineage: bool = pydantic.Field(
+ default=True,
+ description="If enabled, populates the snowflake view->table and table->view lineages. Requires appropriate grants given to the role, and include_table_lineage to be True. view->table lineage requires Snowflake Enterprise Edition or above.",
+ )
+
+ database_pattern: AllowDenyPattern = AllowDenyPattern(
+ deny=[r"^UTIL_DB$", r"^SNOWFLAKE$", r"^SNOWFLAKE_SAMPLE_DATA$"]
+ )
+
+ ignore_start_time_lineage: bool = False
+ upstream_lineage_in_report: bool = False
+
+ @pydantic.root_validator()
+ def validate_include_view_lineage(cls, values):
+ if (
+ "include_table_lineage" in values
+ and not values.get("include_table_lineage")
+ and values.get("include_view_lineage")
+ ):
+ raise ValueError(
+ "include_table_lineage must be True for include_view_lineage to be set."
+ )
+ return values
diff --git a/metadata-ingestion/tests/integration/fivetran/fivetran_golden.json b/metadata-ingestion/tests/integration/fivetran/fivetran_golden.json
new file mode 100644
index 0000000000000..a72c960a72296
--- /dev/null
+++ b/metadata-ingestion/tests/integration/fivetran/fivetran_golden.json
@@ -0,0 +1,658 @@
+[
+{
+ "entityType": "dataFlow",
+ "entityUrn": "urn:li:dataFlow:(fivetran,calendar_elected,PROD)",
+ "changeType": "UPSERT",
+ "aspectName": "dataFlowInfo",
+ "aspect": {
+ "json": {
+ "customProperties": {},
+ "name": "postgres"
+ }
+ },
+ "systemMetadata": {
+ "lastObserved": 1654621200000,
+ "runId": "powerbi-test",
+ "lastRunId": "no-run-id-provided"
+ }
+},
+{
+ "entityType": "dataFlow",
+ "entityUrn": "urn:li:dataFlow:(fivetran,calendar_elected,PROD)",
+ "changeType": "UPSERT",
+ "aspectName": "ownership",
+ "aspect": {
+ "json": {
+ "owners": [],
+ "lastModified": {
+ "time": 0,
+ "actor": "urn:li:corpuser:fivetran"
+ }
+ }
+ },
+ "systemMetadata": {
+ "lastObserved": 1654621200000,
+ "runId": "powerbi-test",
+ "lastRunId": "no-run-id-provided"
+ }
+},
+{
+ "entityType": "dataFlow",
+ "entityUrn": "urn:li:dataFlow:(fivetran,calendar_elected,PROD)",
+ "changeType": "UPSERT",
+ "aspectName": "globalTags",
+ "aspect": {
+ "json": {
+ "tags": []
+ }
+ },
+ "systemMetadata": {
+ "lastObserved": 1654621200000,
+ "runId": "powerbi-test",
+ "lastRunId": "no-run-id-provided"
+ }
+},
+{
+ "entityType": "dataJob",
+ "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(fivetran,calendar_elected,PROD),calendar_elected)",
+ "changeType": "UPSERT",
+ "aspectName": "dataJobInfo",
+ "aspect": {
+ "json": {
+ "customProperties": {
+ "paused": "False",
+ "sync_frequency": "1440",
+ "destination_id": "'interval_unconstitutional'"
+ },
+ "name": "postgres",
+ "type": {
+ "string": "COMMAND"
+ }
+ }
+ },
+ "systemMetadata": {
+ "lastObserved": 1654621200000,
+ "runId": "powerbi-test",
+ "lastRunId": "no-run-id-provided"
+ }
+},
+{
+ "entityType": "dataJob",
+ "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(fivetran,calendar_elected,PROD),calendar_elected)",
+ "changeType": "UPSERT",
+ "aspectName": "dataJobInputOutput",
+ "aspect": {
+ "json": {
+ "inputDatasets": [
+ "urn:li:dataset:(urn:li:dataPlatform:postgres,postgres_db.public.employee,DEV)",
+ "urn:li:dataset:(urn:li:dataPlatform:postgres,postgres_db.public.company,DEV)"
+ ],
+ "outputDatasets": [
+ "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_database.postgres_public.employee,PROD)",
+ "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_database.postgres_public.company,PROD)"
+ ],
+ "inputDatajobs": [],
+ "fineGrainedLineages": [
+ {
+ "upstreamType": "FIELD_SET",
+ "upstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,postgres_db.public.employee,DEV),id)"
+ ],
+ "downstreamType": "FIELD",
+ "downstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_database.postgres_public.employee,PROD),id)"
+ ],
+ "confidenceScore": 1.0
+ },
+ {
+ "upstreamType": "FIELD_SET",
+ "upstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,postgres_db.public.employee,DEV),name)"
+ ],
+ "downstreamType": "FIELD",
+ "downstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_database.postgres_public.employee,PROD),name)"
+ ],
+ "confidenceScore": 1.0
+ },
+ {
+ "upstreamType": "FIELD_SET",
+ "upstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,postgres_db.public.company,DEV),id)"
+ ],
+ "downstreamType": "FIELD",
+ "downstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_database.postgres_public.company,PROD),id)"
+ ],
+ "confidenceScore": 1.0
+ },
+ {
+ "upstreamType": "FIELD_SET",
+ "upstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,postgres_db.public.company,DEV),name)"
+ ],
+ "downstreamType": "FIELD",
+ "downstreams": [
+ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_database.postgres_public.company,PROD),name)"
+ ],
+ "confidenceScore": 1.0
+ }
+ ]
+ }
+ },
+ "systemMetadata": {
+ "lastObserved": 1654621200000,
+ "runId": "powerbi-test",
+ "lastRunId": "no-run-id-provided"
+ }
+},
+{
+ "entityType": "dataset",
+ "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:postgres,postgres_db.public.employee,DEV)",
+ "changeType": "UPSERT",
+ "aspectName": "status",
+ "aspect": {
+ "json": {
+ "removed": false
+ }
+ },
+ "systemMetadata": {
+ "lastObserved": 1654621200000,
+ "runId": "powerbi-test",
+ "lastRunId": "no-run-id-provided"
+ }
+},
+{
+ "entityType": "dataset",
+ "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:postgres,postgres_db.public.company,DEV)",
+ "changeType": "UPSERT",
+ "aspectName": "status",
+ "aspect": {
+ "json": {
+ "removed": false
+ }
+ },
+ "systemMetadata": {
+ "lastObserved": 1654621200000,
+ "runId": "powerbi-test",
+ "lastRunId": "no-run-id-provided"
+ }
+},
+{
+ "entityType": "dataset",
+ "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_database.postgres_public.employee,PROD)",
+ "changeType": "UPSERT",
+ "aspectName": "status",
+ "aspect": {
+ "json": {
+ "removed": false
+ }
+ },
+ "systemMetadata": {
+ "lastObserved": 1654621200000,
+ "runId": "powerbi-test",
+ "lastRunId": "no-run-id-provided"
+ }
+},
+{
+ "entityType": "dataset",
+ "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_database.postgres_public.company,PROD)",
+ "changeType": "UPSERT",
+ "aspectName": "status",
+ "aspect": {
+ "json": {
+ "removed": false
+ }
+ },
+ "systemMetadata": {
+ "lastObserved": 1654621200000,
+ "runId": "powerbi-test",
+ "lastRunId": "no-run-id-provided"
+ }
+},
+{
+ "entityType": "dataJob",
+ "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(fivetran,calendar_elected,PROD),calendar_elected)",
+ "changeType": "UPSERT",
+ "aspectName": "ownership",
+ "aspect": {
+ "json": {
+ "owners": [
+ {
+ "owner": "urn:li:corpuser:Shubham Jagtap",
+ "type": "DEVELOPER",
+ "source": {
+ "type": "SERVICE"
+ }
+ }
+ ],
+ "lastModified": {
+ "time": 0,
+ "actor": "urn:li:corpuser:fivetran"
+ }
+ }
+ },
+ "systemMetadata": {
+ "lastObserved": 1654621200000,
+ "runId": "powerbi-test",
+ "lastRunId": "no-run-id-provided"
+ }
+},
+{
+ "entityType": "dataJob",
+ "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(fivetran,calendar_elected,PROD),calendar_elected)",
+ "changeType": "UPSERT",
+ "aspectName": "globalTags",
+ "aspect": {
+ "json": {
+ "tags": []
+ }
+ },
+ "systemMetadata": {
+ "lastObserved": 1654621200000,
+ "runId": "powerbi-test",
+ "lastRunId": "no-run-id-provided"
+ }
+},
+{
+ "entityType": "dataProcessInstance",
+ "entityUrn": "urn:li:dataProcessInstance:ee88d32dbe3133a23a9023c097050190",
+ "changeType": "UPSERT",
+ "aspectName": "dataProcessInstanceProperties",
+ "aspect": {
+ "json": {
+ "customProperties": {},
+ "name": "4c9a03d6-eded-4422-a46a-163266e58243",
+ "type": "BATCH_SCHEDULED",
+ "created": {
+ "time": 1695191853000,
+ "actor": "urn:li:corpuser:datahub"
+ }
+ }
+ },
+ "systemMetadata": {
+ "lastObserved": 1654621200000,
+ "runId": "powerbi-test",
+ "lastRunId": "no-run-id-provided"
+ }
+},
+{
+ "entityType": "dataProcessInstance",
+ "entityUrn": "urn:li:dataProcessInstance:ee88d32dbe3133a23a9023c097050190",
+ "changeType": "UPSERT",
+ "aspectName": "dataProcessInstanceRelationships",
+ "aspect": {
+ "json": {
+ "parentTemplate": "urn:li:dataJob:(urn:li:dataFlow:(fivetran,calendar_elected,PROD),calendar_elected)",
+ "upstreamInstances": []
+ }
+ },
+ "systemMetadata": {
+ "lastObserved": 1654621200000,
+ "runId": "powerbi-test",
+ "lastRunId": "no-run-id-provided"
+ }
+},
+{
+ "entityType": "dataProcessInstance",
+ "entityUrn": "urn:li:dataProcessInstance:ee88d32dbe3133a23a9023c097050190",
+ "changeType": "UPSERT",
+ "aspectName": "dataProcessInstanceInput",
+ "aspect": {
+ "json": {
+ "inputs": [
+ "urn:li:dataset:(urn:li:dataPlatform:postgres,postgres_db.public.employee,DEV)",
+ "urn:li:dataset:(urn:li:dataPlatform:postgres,postgres_db.public.company,DEV)"
+ ]
+ }
+ },
+ "systemMetadata": {
+ "lastObserved": 1654621200000,
+ "runId": "powerbi-test",
+ "lastRunId": "no-run-id-provided"
+ }
+},
+{
+ "entityType": "dataProcessInstance",
+ "entityUrn": "urn:li:dataProcessInstance:ee88d32dbe3133a23a9023c097050190",
+ "changeType": "UPSERT",
+ "aspectName": "dataProcessInstanceOutput",
+ "aspect": {
+ "json": {
+ "outputs": [
+ "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_database.postgres_public.employee,PROD)",
+ "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_database.postgres_public.company,PROD)"
+ ]
+ }
+ },
+ "systemMetadata": {
+ "lastObserved": 1654621200000,
+ "runId": "powerbi-test",
+ "lastRunId": "no-run-id-provided"
+ }
+},
+{
+ "entityType": "dataProcessInstance",
+ "entityUrn": "urn:li:dataProcessInstance:ee88d32dbe3133a23a9023c097050190",
+ "changeType": "UPSERT",
+ "aspectName": "dataProcessInstanceRunEvent",
+ "aspect": {
+ "json": {
+ "timestampMillis": 1695191853000,
+ "partitionSpec": {
+ "type": "FULL_TABLE",
+ "partition": "FULL_TABLE_SNAPSHOT"
+ },
+ "status": "STARTED"
+ }
+ },
+ "systemMetadata": {
+ "lastObserved": 1654621200000,
+ "runId": "powerbi-test",
+ "lastRunId": "no-run-id-provided"
+ }
+},
+{
+ "entityType": "dataProcessInstance",
+ "entityUrn": "urn:li:dataProcessInstance:ee88d32dbe3133a23a9023c097050190",
+ "changeType": "UPSERT",
+ "aspectName": "dataProcessInstanceRunEvent",
+ "aspect": {
+ "json": {
+ "timestampMillis": 1695191885000,
+ "partitionSpec": {
+ "type": "FULL_TABLE",
+ "partition": "FULL_TABLE_SNAPSHOT"
+ },
+ "status": "COMPLETE",
+ "result": {
+ "type": "SUCCESS",
+ "nativeResultType": "fivetran"
+ }
+ }
+ },
+ "systemMetadata": {
+ "lastObserved": 1654621200000,
+ "runId": "powerbi-test",
+ "lastRunId": "no-run-id-provided"
+ }
+},
+{
+ "entityType": "dataProcessInstance",
+ "entityUrn": "urn:li:dataProcessInstance:be36f55c13ec4e313c7510770e50784a",
+ "changeType": "UPSERT",
+ "aspectName": "dataProcessInstanceProperties",
+ "aspect": {
+ "json": {
+ "customProperties": {},
+ "name": "f773d1e9-c791-48f4-894f-8cf9b3dfc834",
+ "type": "BATCH_SCHEDULED",
+ "created": {
+ "time": 1696343730000,
+ "actor": "urn:li:corpuser:datahub"
+ }
+ }
+ },
+ "systemMetadata": {
+ "lastObserved": 1654621200000,
+ "runId": "powerbi-test",
+ "lastRunId": "no-run-id-provided"
+ }
+},
+{
+ "entityType": "dataProcessInstance",
+ "entityUrn": "urn:li:dataProcessInstance:be36f55c13ec4e313c7510770e50784a",
+ "changeType": "UPSERT",
+ "aspectName": "dataProcessInstanceRelationships",
+ "aspect": {
+ "json": {
+ "parentTemplate": "urn:li:dataJob:(urn:li:dataFlow:(fivetran,calendar_elected,PROD),calendar_elected)",
+ "upstreamInstances": []
+ }
+ },
+ "systemMetadata": {
+ "lastObserved": 1654621200000,
+ "runId": "powerbi-test",
+ "lastRunId": "no-run-id-provided"
+ }
+},
+{
+ "entityType": "dataProcessInstance",
+ "entityUrn": "urn:li:dataProcessInstance:be36f55c13ec4e313c7510770e50784a",
+ "changeType": "UPSERT",
+ "aspectName": "dataProcessInstanceInput",
+ "aspect": {
+ "json": {
+ "inputs": [
+ "urn:li:dataset:(urn:li:dataPlatform:postgres,postgres_db.public.employee,DEV)",
+ "urn:li:dataset:(urn:li:dataPlatform:postgres,postgres_db.public.company,DEV)"
+ ]
+ }
+ },
+ "systemMetadata": {
+ "lastObserved": 1654621200000,
+ "runId": "powerbi-test",
+ "lastRunId": "no-run-id-provided"
+ }
+},
+{
+ "entityType": "dataProcessInstance",
+ "entityUrn": "urn:li:dataProcessInstance:be36f55c13ec4e313c7510770e50784a",
+ "changeType": "UPSERT",
+ "aspectName": "dataProcessInstanceOutput",
+ "aspect": {
+ "json": {
+ "outputs": [
+ "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_database.postgres_public.employee,PROD)",
+ "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_database.postgres_public.company,PROD)"
+ ]
+ }
+ },
+ "systemMetadata": {
+ "lastObserved": 1654621200000,
+ "runId": "powerbi-test",
+ "lastRunId": "no-run-id-provided"
+ }
+},
+{
+ "entityType": "dataProcessInstance",
+ "entityUrn": "urn:li:dataProcessInstance:be36f55c13ec4e313c7510770e50784a",
+ "changeType": "UPSERT",
+ "aspectName": "dataProcessInstanceRunEvent",
+ "aspect": {
+ "json": {
+ "timestampMillis": 1696343730000,
+ "partitionSpec": {
+ "type": "FULL_TABLE",
+ "partition": "FULL_TABLE_SNAPSHOT"
+ },
+ "status": "STARTED"
+ }
+ },
+ "systemMetadata": {
+ "lastObserved": 1654621200000,
+ "runId": "powerbi-test",
+ "lastRunId": "no-run-id-provided"
+ }
+},
+{
+ "entityType": "dataProcessInstance",
+ "entityUrn": "urn:li:dataProcessInstance:be36f55c13ec4e313c7510770e50784a",
+ "changeType": "UPSERT",
+ "aspectName": "dataProcessInstanceRunEvent",
+ "aspect": {
+ "json": {
+ "timestampMillis": 1696343732000,
+ "partitionSpec": {
+ "type": "FULL_TABLE",
+ "partition": "FULL_TABLE_SNAPSHOT"
+ },
+ "status": "COMPLETE",
+ "result": {
+ "type": "SKIPPED",
+ "nativeResultType": "fivetran"
+ }
+ }
+ },
+ "systemMetadata": {
+ "lastObserved": 1654621200000,
+ "runId": "powerbi-test",
+ "lastRunId": "no-run-id-provided"
+ }
+},
+{
+ "entityType": "dataProcessInstance",
+ "entityUrn": "urn:li:dataProcessInstance:d8f100271d2dc3fa905717f82d083c8d",
+ "changeType": "UPSERT",
+ "aspectName": "dataProcessInstanceProperties",
+ "aspect": {
+ "json": {
+ "customProperties": {},
+ "name": "63c2fc85-600b-455f-9ba0-f576522465be",
+ "type": "BATCH_SCHEDULED",
+ "created": {
+ "time": 1696343755000,
+ "actor": "urn:li:corpuser:datahub"
+ }
+ }
+ },
+ "systemMetadata": {
+ "lastObserved": 1654621200000,
+ "runId": "powerbi-test",
+ "lastRunId": "no-run-id-provided"
+ }
+},
+{
+ "entityType": "dataProcessInstance",
+ "entityUrn": "urn:li:dataProcessInstance:d8f100271d2dc3fa905717f82d083c8d",
+ "changeType": "UPSERT",
+ "aspectName": "dataProcessInstanceRelationships",
+ "aspect": {
+ "json": {
+ "parentTemplate": "urn:li:dataJob:(urn:li:dataFlow:(fivetran,calendar_elected,PROD),calendar_elected)",
+ "upstreamInstances": []
+ }
+ },
+ "systemMetadata": {
+ "lastObserved": 1654621200000,
+ "runId": "powerbi-test",
+ "lastRunId": "no-run-id-provided"
+ }
+},
+{
+ "entityType": "dataProcessInstance",
+ "entityUrn": "urn:li:dataProcessInstance:d8f100271d2dc3fa905717f82d083c8d",
+ "changeType": "UPSERT",
+ "aspectName": "dataProcessInstanceInput",
+ "aspect": {
+ "json": {
+ "inputs": [
+ "urn:li:dataset:(urn:li:dataPlatform:postgres,postgres_db.public.employee,DEV)",
+ "urn:li:dataset:(urn:li:dataPlatform:postgres,postgres_db.public.company,DEV)"
+ ]
+ }
+ },
+ "systemMetadata": {
+ "lastObserved": 1654621200000,
+ "runId": "powerbi-test",
+ "lastRunId": "no-run-id-provided"
+ }
+},
+{
+ "entityType": "dataProcessInstance",
+ "entityUrn": "urn:li:dataProcessInstance:d8f100271d2dc3fa905717f82d083c8d",
+ "changeType": "UPSERT",
+ "aspectName": "dataProcessInstanceOutput",
+ "aspect": {
+ "json": {
+ "outputs": [
+ "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_database.postgres_public.employee,PROD)",
+ "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_database.postgres_public.company,PROD)"
+ ]
+ }
+ },
+ "systemMetadata": {
+ "lastObserved": 1654621200000,
+ "runId": "powerbi-test",
+ "lastRunId": "no-run-id-provided"
+ }
+},
+{
+ "entityType": "dataProcessInstance",
+ "entityUrn": "urn:li:dataProcessInstance:d8f100271d2dc3fa905717f82d083c8d",
+ "changeType": "UPSERT",
+ "aspectName": "dataProcessInstanceRunEvent",
+ "aspect": {
+ "json": {
+ "timestampMillis": 1696343755000,
+ "partitionSpec": {
+ "type": "FULL_TABLE",
+ "partition": "FULL_TABLE_SNAPSHOT"
+ },
+ "status": "STARTED"
+ }
+ },
+ "systemMetadata": {
+ "lastObserved": 1654621200000,
+ "runId": "powerbi-test",
+ "lastRunId": "no-run-id-provided"
+ }
+},
+{
+ "entityType": "dataProcessInstance",
+ "entityUrn": "urn:li:dataProcessInstance:d8f100271d2dc3fa905717f82d083c8d",
+ "changeType": "UPSERT",
+ "aspectName": "dataProcessInstanceRunEvent",
+ "aspect": {
+ "json": {
+ "timestampMillis": 1696343790000,
+ "partitionSpec": {
+ "type": "FULL_TABLE",
+ "partition": "FULL_TABLE_SNAPSHOT"
+ },
+ "status": "COMPLETE",
+ "result": {
+ "type": "FAILURE",
+ "nativeResultType": "fivetran"
+ }
+ }
+ },
+ "systemMetadata": {
+ "lastObserved": 1654621200000,
+ "runId": "powerbi-test",
+ "lastRunId": "no-run-id-provided"
+ }
+},
+{
+ "entityType": "dataFlow",
+ "entityUrn": "urn:li:dataFlow:(fivetran,calendar_elected,PROD)",
+ "changeType": "UPSERT",
+ "aspectName": "status",
+ "aspect": {
+ "json": {
+ "removed": false
+ }
+ },
+ "systemMetadata": {
+ "lastObserved": 1654621200000,
+ "runId": "powerbi-test",
+ "lastRunId": "no-run-id-provided"
+ }
+},
+{
+ "entityType": "dataJob",
+ "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(fivetran,calendar_elected,PROD),calendar_elected)",
+ "changeType": "UPSERT",
+ "aspectName": "status",
+ "aspect": {
+ "json": {
+ "removed": false
+ }
+ },
+ "systemMetadata": {
+ "lastObserved": 1654621200000,
+ "runId": "powerbi-test",
+ "lastRunId": "no-run-id-provided"
+ }
+}
+]
\ No newline at end of file
diff --git a/metadata-ingestion/tests/integration/fivetran/test_fivetran.py b/metadata-ingestion/tests/integration/fivetran/test_fivetran.py
new file mode 100644
index 0000000000000..62b3df12e1b9d
--- /dev/null
+++ b/metadata-ingestion/tests/integration/fivetran/test_fivetran.py
@@ -0,0 +1,192 @@
+import datetime
+from unittest import mock
+from unittest.mock import MagicMock
+
+import pytest
+from freezegun import freeze_time
+
+from datahub.ingestion.run.pipeline import Pipeline
+from datahub.ingestion.source.fivetran.config import DestinationConfig
+from datahub.ingestion.source.fivetran.fivetran_query import FivetranLogQuery
+from tests.test_helpers import mce_helpers
+
+FROZEN_TIME = "2022-06-07 17:00:00"
+
+
+def default_query_results(query):
+ if query == FivetranLogQuery.use_schema("TEST_DATABASE", "TEST_SCHEMA"):
+ return []
+ elif query == FivetranLogQuery.get_connectors_query():
+ return [
+ {
+ "connector_id": "calendar_elected",
+ "connecting_user_id": "reapply_phone",
+ "connector_type_id": "postgres",
+ "connector_name": "postgres",
+ "paused": False,
+ "sync_frequency": 1440,
+ "destination_id": "interval_unconstitutional",
+ },
+ ]
+ elif query == FivetranLogQuery.get_table_lineage_query("calendar_elected"):
+ return [
+ {
+ "source_table_id": "10040",
+ "source_table_name": "employee",
+ "source_schema_name": "public",
+ "destination_table_id": "7779",
+ "destination_table_name": "employee",
+ "destination_schema_name": "postgres_public",
+ },
+ {
+ "source_table_id": "10041",
+ "source_table_name": "company",
+ "source_schema_name": "public",
+ "destination_table_id": "7780",
+ "destination_table_name": "company",
+ "destination_schema_name": "postgres_public",
+ },
+ ]
+ elif query == FivetranLogQuery.get_column_lineage_query(
+ "10040", "7779"
+ ) or query == FivetranLogQuery.get_column_lineage_query("10041", "7780"):
+ return [
+ {
+ "source_column_name": "id",
+ "destination_column_name": "id",
+ },
+ {
+ "source_column_name": "name",
+ "destination_column_name": "name",
+ },
+ ]
+ elif query == FivetranLogQuery.get_user_query("reapply_phone"):
+ return [
+ {
+ "user_id": "reapply_phone",
+ "given_name": "Shubham",
+ "family_name": "Jagtap",
+ }
+ ]
+ elif query == FivetranLogQuery.get_sync_start_logs_query("calendar_elected"):
+ return [
+ {
+ "time_stamp": datetime.datetime(2023, 9, 20, 6, 37, 32, 606000),
+ "sync_id": "4c9a03d6-eded-4422-a46a-163266e58243",
+ },
+ {
+ "time_stamp": datetime.datetime(2023, 10, 3, 14, 35, 30, 345000),
+ "sync_id": "f773d1e9-c791-48f4-894f-8cf9b3dfc834",
+ },
+ {
+ "time_stamp": datetime.datetime(2023, 10, 3, 14, 35, 55, 401000),
+ "sync_id": "63c2fc85-600b-455f-9ba0-f576522465be",
+ },
+ ]
+ elif query == FivetranLogQuery.get_sync_end_logs_query("calendar_elected"):
+ return [
+ {
+ "time_stamp": datetime.datetime(2023, 9, 20, 6, 38, 5, 56000),
+ "sync_id": "4c9a03d6-eded-4422-a46a-163266e58243",
+ "message_data": '"{\\"status\\":\\"SUCCESSFUL\\"}"',
+ },
+ {
+ "time_stamp": datetime.datetime(2023, 10, 3, 14, 35, 31, 512000),
+ "sync_id": "f773d1e9-c791-48f4-894f-8cf9b3dfc834",
+ "message_data": '"{\\"reason\\":\\"Sync has been cancelled because of a user action in the dashboard.Standard Config updated.\\",\\"status\\":\\"CANCELED\\"}"',
+ },
+ {
+ "time_stamp": datetime.datetime(2023, 10, 3, 14, 36, 29, 678000),
+ "sync_id": "63c2fc85-600b-455f-9ba0-f576522465be",
+ "message_data": '"{\\"reason\\":\\"java.lang.RuntimeException: FATAL: too many connections for role \\\\\\"hxwraqld\\\\\\"\\",\\"taskType\\":\\"reconnect\\",\\"status\\":\\"FAILURE_WITH_TASK\\"}"',
+ },
+ ]
+ # Unreachable code
+ raise Exception(f"Unknown query {query}")
+
+
+@freeze_time(FROZEN_TIME)
+@pytest.mark.integration
+def test_fivetran_basic(pytestconfig, tmp_path):
+ test_resources_dir = pytestconfig.rootpath / "tests/integration/fivetran"
+
+ # Run the metadata ingestion pipeline.
+ output_file = tmp_path / "fivetran_test_events.json"
+ golden_file = test_resources_dir / "fivetran_golden.json"
+
+ with mock.patch(
+ "datahub.ingestion.source.fivetran.fivetran_log_api.create_engine"
+ ) as mock_create_engine:
+ connection_magic_mock = MagicMock()
+ connection_magic_mock.execute.side_effect = default_query_results
+
+ mock_create_engine.return_value = connection_magic_mock
+
+ pipeline = Pipeline.create(
+ {
+ "run_id": "powerbi-test",
+ "source": {
+ "type": "fivetran",
+ "config": {
+ "fivetran_log_config": {
+ "destination_platform": "snowflake",
+ "destination_config": {
+ "account_id": "TESTID",
+ "warehouse": "TEST_WH",
+ "username": "test",
+ "password": "test@123",
+ "database": "TEST_DATABASE",
+ "role": "TESTROLE",
+ "log_schema": "TEST_SCHEMA",
+ },
+ },
+ "connector_patterns": {
+ "allow": [
+ "postgres",
+ ]
+ },
+ "sources_to_database": {
+ "calendar_elected": "postgres_db",
+ },
+ "sources_to_platform_instance": {
+ "calendar_elected": {
+ "env": "DEV",
+ }
+ },
+ },
+ },
+ "sink": {
+ "type": "file",
+ "config": {
+ "filename": f"{output_file}",
+ },
+ },
+ }
+ )
+
+ pipeline.run()
+ pipeline.raise_from_status()
+ golden_file = "fivetran_golden.json"
+
+ mce_helpers.check_golden_file(
+ pytestconfig,
+ output_path=f"{output_file}",
+ golden_path=f"{test_resources_dir}/{golden_file}",
+ )
+
+
+@freeze_time(FROZEN_TIME)
+def test_fivetran_snowflake_destination_config(pytestconfig, tmp_path):
+ snowflake_dest = DestinationConfig(
+ account_id="TESTID",
+ warehouse="TEST_WH",
+ username="test",
+ password="test@123",
+ database="TEST_DATABASE",
+ role="TESTROLE",
+ log_schema="TEST_SCHEMA",
+ )
+ assert (
+ snowflake_dest.get_sql_alchemy_url()
+ == "snowflake://test:test%40123@TESTID?application=acryl_datahub&authenticator=SNOWFLAKE&role=TESTROLE&warehouse=TEST_WH"
+ )
diff --git a/metadata-service/war/src/main/resources/boot/data_platforms.json b/metadata-service/war/src/main/resources/boot/data_platforms.json
index 3d956c5774ded..3c70eda8561b8 100644
--- a/metadata-service/war/src/main/resources/boot/data_platforms.json
+++ b/metadata-service/war/src/main/resources/boot/data_platforms.json
@@ -564,5 +564,15 @@
"type": "KEY_VALUE_STORE",
"logoUrl": "/assets/platforms/dynamodblogo.png"
}
+ },
+ {
+ "urn": "urn:li:dataPlatform:fivetran",
+ "aspect": {
+ "datasetNameDelimiter": ".",
+ "name": "fivetran",
+ "displayName": "Fivetran",
+ "type": "OTHERS",
+ "logoUrl": "/assets/platforms/fivetranlogo.png"
+ }
}
]
From 399e032dfa2b4bf87b7b406e7b009e34e99a1003 Mon Sep 17 00:00:00 2001
From: deepgarg-visa <149145061+deepgarg-visa@users.noreply.github.com>
Date: Wed, 8 Nov 2023 22:32:13 +0530
Subject: [PATCH 072/792] feat(neo4j): Allow datahub to connect to specific
neo4j database (#9179)
Co-authored-by: david-leifker <114954101+david-leifker@users.noreply.github.com>
---
docker/docker-compose.override.yml | 4 ++
.../docker-compose-m1.quickstart.yml | 54 +++++++++----------
.../quickstart/docker-compose.quickstart.yml | 54 +++++++++----------
.../src/main/resources/application.yml | 1 +
.../common/Neo4jGraphServiceFactory.java | 7 ++-
5 files changed, 65 insertions(+), 55 deletions(-)
diff --git a/docker/docker-compose.override.yml b/docker/docker-compose.override.yml
index 225aa01fa4e4f..0907f47d70c3c 100644
--- a/docker/docker-compose.override.yml
+++ b/docker/docker-compose.override.yml
@@ -7,8 +7,12 @@ services:
environment:
- DATAHUB_SERVER_TYPE=${DATAHUB_SERVER_TYPE:-quickstart}
- DATAHUB_TELEMETRY_ENABLED=${DATAHUB_TELEMETRY_ENABLED:-true}
+ - GRAPH_SERVICE_IMPL=${GRAPH_SERVICE_IMPL:-elasticsearch}
volumes:
- ${HOME}/.datahub/plugins:/etc/datahub/plugins
+ datahub-upgrade:
+ environment:
+ - GRAPH_SERVICE_IMPL=${GRAPH_SERVICE_IMPL:-elasticsearch}
mysql-setup:
container_name: mysql-setup
hostname: mysql-setup
diff --git a/docker/quickstart/docker-compose-m1.quickstart.yml b/docker/quickstart/docker-compose-m1.quickstart.yml
index c96baf37551b2..613718306abef 100644
--- a/docker/quickstart/docker-compose-m1.quickstart.yml
+++ b/docker/quickstart/docker-compose-m1.quickstart.yml
@@ -81,32 +81,32 @@ services:
- DATAHUB_SERVER_TYPE=${DATAHUB_SERVER_TYPE:-quickstart}
- DATAHUB_TELEMETRY_ENABLED=${DATAHUB_TELEMETRY_ENABLED:-true}
- DATAHUB_UPGRADE_HISTORY_KAFKA_CONSUMER_GROUP_ID=generic-duhe-consumer-job-client-gms
- - EBEAN_DATASOURCE_USERNAME=datahub
- - EBEAN_DATASOURCE_PASSWORD=datahub
+ - EBEAN_DATASOURCE_DRIVER=com.mysql.jdbc.Driver
- EBEAN_DATASOURCE_HOST=mysql:3306
+ - EBEAN_DATASOURCE_PASSWORD=datahub
- EBEAN_DATASOURCE_URL=jdbc:mysql://mysql:3306/datahub?verifyServerCertificate=false&useSSL=true&useUnicode=yes&characterEncoding=UTF-8&enabledTLSProtocols=TLSv1.2
- - EBEAN_DATASOURCE_DRIVER=com.mysql.jdbc.Driver
- - KAFKA_BOOTSTRAP_SERVER=broker:29092
- - KAFKA_SCHEMAREGISTRY_URL=http://schema-registry:8081
+ - EBEAN_DATASOURCE_USERNAME=datahub
- ELASTICSEARCH_HOST=elasticsearch
- - ELASTICSEARCH_PORT=9200
- - ES_BULK_REFRESH_POLICY=WAIT_UNTIL
- - ELASTICSEARCH_INDEX_BUILDER_SETTINGS_REINDEX=true
- ELASTICSEARCH_INDEX_BUILDER_MAPPINGS_REINDEX=true
- - NEO4J_HOST=http://neo4j:7474
- - NEO4J_URI=bolt://neo4j
- - NEO4J_USERNAME=neo4j
- - NEO4J_PASSWORD=datahub
- - JAVA_OPTS=-Xms1g -Xmx1g
- - GRAPH_SERVICE_DIFF_MODE_ENABLED=true
- - GRAPH_SERVICE_IMPL=neo4j
+ - ELASTICSEARCH_INDEX_BUILDER_SETTINGS_REINDEX=true
+ - ELASTICSEARCH_PORT=9200
- ENTITY_REGISTRY_CONFIG_PATH=/datahub/datahub-gms/resources/entity-registry.yml
- ENTITY_SERVICE_ENABLE_RETENTION=true
+ - ES_BULK_REFRESH_POLICY=WAIT_UNTIL
+ - GRAPH_SERVICE_DIFF_MODE_ENABLED=true
+ - GRAPH_SERVICE_IMPL=${GRAPH_SERVICE_IMPL:-elasticsearch}
+ - JAVA_OPTS=-Xms1g -Xmx1g
+ - KAFKA_BOOTSTRAP_SERVER=broker:29092
+ - KAFKA_SCHEMAREGISTRY_URL=http://schema-registry:8081
- MAE_CONSUMER_ENABLED=true
- MCE_CONSUMER_ENABLED=true
+ - METADATA_SERVICE_AUTH_ENABLED=false
+ - NEO4J_HOST=http://neo4j:7474
+ - NEO4J_PASSWORD=datahub
+ - NEO4J_URI=bolt://neo4j
+ - NEO4J_USERNAME=neo4j
- PE_CONSUMER_ENABLED=true
- UI_INGESTION_ENABLED=true
- - METADATA_SERVICE_AUTH_ENABLED=false
healthcheck:
interval: 1s
retries: 3
@@ -134,23 +134,23 @@ services:
neo4j:
condition: service_healthy
environment:
- - EBEAN_DATASOURCE_USERNAME=datahub
- - EBEAN_DATASOURCE_PASSWORD=datahub
+ - BACKFILL_BROWSE_PATHS_V2=true
+ - DATAHUB_GMS_HOST=datahub-gms
+ - DATAHUB_GMS_PORT=8080
+ - EBEAN_DATASOURCE_DRIVER=com.mysql.jdbc.Driver
- EBEAN_DATASOURCE_HOST=mysql:3306
+ - EBEAN_DATASOURCE_PASSWORD=datahub
- EBEAN_DATASOURCE_URL=jdbc:mysql://mysql:3306/datahub?verifyServerCertificate=false&useSSL=true&useUnicode=yes&characterEncoding=UTF-8
- - EBEAN_DATASOURCE_DRIVER=com.mysql.jdbc.Driver
- - KAFKA_BOOTSTRAP_SERVER=broker:29092
- - KAFKA_SCHEMAREGISTRY_URL=http://schema-registry:8081
+ - EBEAN_DATASOURCE_USERNAME=datahub
+ - ELASTICSEARCH_BUILD_INDICES_CLONE_INDICES=false
- ELASTICSEARCH_HOST=elasticsearch
- - ELASTICSEARCH_PORT=9200
- ELASTICSEARCH_INDEX_BUILDER_MAPPINGS_REINDEX=true
- ELASTICSEARCH_INDEX_BUILDER_SETTINGS_REINDEX=true
- - ELASTICSEARCH_BUILD_INDICES_CLONE_INDICES=false
- - GRAPH_SERVICE_IMPL=elasticsearch
- - DATAHUB_GMS_HOST=datahub-gms
- - DATAHUB_GMS_PORT=8080
+ - ELASTICSEARCH_PORT=9200
- ENTITY_REGISTRY_CONFIG_PATH=/datahub/datahub-gms/resources/entity-registry.yml
- - BACKFILL_BROWSE_PATHS_V2=true
+ - GRAPH_SERVICE_IMPL=${GRAPH_SERVICE_IMPL:-elasticsearch}
+ - KAFKA_BOOTSTRAP_SERVER=broker:29092
+ - KAFKA_SCHEMAREGISTRY_URL=http://schema-registry:8081
- REPROCESS_DEFAULT_BROWSE_PATHS_V2=false
hostname: datahub-upgrade
image: ${DATAHUB_UPGRADE_IMAGE:-acryldata/datahub-upgrade}:${DATAHUB_VERSION:-head}
diff --git a/docker/quickstart/docker-compose.quickstart.yml b/docker/quickstart/docker-compose.quickstart.yml
index 8a66521cbb522..30ccbae59be74 100644
--- a/docker/quickstart/docker-compose.quickstart.yml
+++ b/docker/quickstart/docker-compose.quickstart.yml
@@ -81,32 +81,32 @@ services:
- DATAHUB_SERVER_TYPE=${DATAHUB_SERVER_TYPE:-quickstart}
- DATAHUB_TELEMETRY_ENABLED=${DATAHUB_TELEMETRY_ENABLED:-true}
- DATAHUB_UPGRADE_HISTORY_KAFKA_CONSUMER_GROUP_ID=generic-duhe-consumer-job-client-gms
- - EBEAN_DATASOURCE_USERNAME=datahub
- - EBEAN_DATASOURCE_PASSWORD=datahub
+ - EBEAN_DATASOURCE_DRIVER=com.mysql.jdbc.Driver
- EBEAN_DATASOURCE_HOST=mysql:3306
+ - EBEAN_DATASOURCE_PASSWORD=datahub
- EBEAN_DATASOURCE_URL=jdbc:mysql://mysql:3306/datahub?verifyServerCertificate=false&useSSL=true&useUnicode=yes&characterEncoding=UTF-8&enabledTLSProtocols=TLSv1.2
- - EBEAN_DATASOURCE_DRIVER=com.mysql.jdbc.Driver
- - KAFKA_BOOTSTRAP_SERVER=broker:29092
- - KAFKA_SCHEMAREGISTRY_URL=http://schema-registry:8081
+ - EBEAN_DATASOURCE_USERNAME=datahub
- ELASTICSEARCH_HOST=elasticsearch
- - ELASTICSEARCH_PORT=9200
- - ES_BULK_REFRESH_POLICY=WAIT_UNTIL
- - ELASTICSEARCH_INDEX_BUILDER_SETTINGS_REINDEX=true
- ELASTICSEARCH_INDEX_BUILDER_MAPPINGS_REINDEX=true
- - NEO4J_HOST=http://neo4j:7474
- - NEO4J_URI=bolt://neo4j
- - NEO4J_USERNAME=neo4j
- - NEO4J_PASSWORD=datahub
- - JAVA_OPTS=-Xms1g -Xmx1g
- - GRAPH_SERVICE_DIFF_MODE_ENABLED=true
- - GRAPH_SERVICE_IMPL=neo4j
+ - ELASTICSEARCH_INDEX_BUILDER_SETTINGS_REINDEX=true
+ - ELASTICSEARCH_PORT=9200
- ENTITY_REGISTRY_CONFIG_PATH=/datahub/datahub-gms/resources/entity-registry.yml
- ENTITY_SERVICE_ENABLE_RETENTION=true
+ - ES_BULK_REFRESH_POLICY=WAIT_UNTIL
+ - GRAPH_SERVICE_DIFF_MODE_ENABLED=true
+ - GRAPH_SERVICE_IMPL=${GRAPH_SERVICE_IMPL:-elasticsearch}
+ - JAVA_OPTS=-Xms1g -Xmx1g
+ - KAFKA_BOOTSTRAP_SERVER=broker:29092
+ - KAFKA_SCHEMAREGISTRY_URL=http://schema-registry:8081
- MAE_CONSUMER_ENABLED=true
- MCE_CONSUMER_ENABLED=true
+ - METADATA_SERVICE_AUTH_ENABLED=false
+ - NEO4J_HOST=http://neo4j:7474
+ - NEO4J_PASSWORD=datahub
+ - NEO4J_URI=bolt://neo4j
+ - NEO4J_USERNAME=neo4j
- PE_CONSUMER_ENABLED=true
- UI_INGESTION_ENABLED=true
- - METADATA_SERVICE_AUTH_ENABLED=false
healthcheck:
interval: 1s
retries: 3
@@ -134,23 +134,23 @@ services:
neo4j:
condition: service_healthy
environment:
- - EBEAN_DATASOURCE_USERNAME=datahub
- - EBEAN_DATASOURCE_PASSWORD=datahub
+ - BACKFILL_BROWSE_PATHS_V2=true
+ - DATAHUB_GMS_HOST=datahub-gms
+ - DATAHUB_GMS_PORT=8080
+ - EBEAN_DATASOURCE_DRIVER=com.mysql.jdbc.Driver
- EBEAN_DATASOURCE_HOST=mysql:3306
+ - EBEAN_DATASOURCE_PASSWORD=datahub
- EBEAN_DATASOURCE_URL=jdbc:mysql://mysql:3306/datahub?verifyServerCertificate=false&useSSL=true&useUnicode=yes&characterEncoding=UTF-8
- - EBEAN_DATASOURCE_DRIVER=com.mysql.jdbc.Driver
- - KAFKA_BOOTSTRAP_SERVER=broker:29092
- - KAFKA_SCHEMAREGISTRY_URL=http://schema-registry:8081
+ - EBEAN_DATASOURCE_USERNAME=datahub
+ - ELASTICSEARCH_BUILD_INDICES_CLONE_INDICES=false
- ELASTICSEARCH_HOST=elasticsearch
- - ELASTICSEARCH_PORT=9200
- ELASTICSEARCH_INDEX_BUILDER_MAPPINGS_REINDEX=true
- ELASTICSEARCH_INDEX_BUILDER_SETTINGS_REINDEX=true
- - ELASTICSEARCH_BUILD_INDICES_CLONE_INDICES=false
- - GRAPH_SERVICE_IMPL=elasticsearch
- - DATAHUB_GMS_HOST=datahub-gms
- - DATAHUB_GMS_PORT=8080
+ - ELASTICSEARCH_PORT=9200
- ENTITY_REGISTRY_CONFIG_PATH=/datahub/datahub-gms/resources/entity-registry.yml
- - BACKFILL_BROWSE_PATHS_V2=true
+ - GRAPH_SERVICE_IMPL=${GRAPH_SERVICE_IMPL:-elasticsearch}
+ - KAFKA_BOOTSTRAP_SERVER=broker:29092
+ - KAFKA_SCHEMAREGISTRY_URL=http://schema-registry:8081
- REPROCESS_DEFAULT_BROWSE_PATHS_V2=false
hostname: datahub-upgrade
image: ${DATAHUB_UPGRADE_IMAGE:-acryldata/datahub-upgrade}:${DATAHUB_VERSION:-head}
diff --git a/metadata-service/configuration/src/main/resources/application.yml b/metadata-service/configuration/src/main/resources/application.yml
index a06891699607b..46aa02d98572e 100644
--- a/metadata-service/configuration/src/main/resources/application.yml
+++ b/metadata-service/configuration/src/main/resources/application.yml
@@ -251,6 +251,7 @@ neo4j:
username: ${NEO4J_USERNAME:neo4j}
password: ${NEO4J_PASSWORD:datahub}
uri: ${NEO4J_URI:bolt://localhost}
+ database: ${NEO4J_DATABASE:graph.db}
maxConnectionPoolSize: ${NEO4J_MAX_CONNECTION_POOL_SIZE:100}
maxConnectionAcquisitionTimeout: ${NEO4J_MAX_CONNECTION_ACQUISITION_TIMEOUT_IN_SECONDS:60}
maxConnectionLifetimeInSeconds: ${NEO4j_MAX_CONNECTION_LIFETIME_IN_SECONDS:3600}
diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/common/Neo4jGraphServiceFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/common/Neo4jGraphServiceFactory.java
index e62dfd50f897d..87670ce10f481 100644
--- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/common/Neo4jGraphServiceFactory.java
+++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/common/Neo4jGraphServiceFactory.java
@@ -6,8 +6,10 @@
import com.linkedin.metadata.models.registry.EntityRegistry;
import javax.annotation.Nonnull;
import org.neo4j.driver.Driver;
+import org.neo4j.driver.SessionConfig;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.beans.factory.annotation.Qualifier;
+import org.springframework.beans.factory.annotation.Value;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
import org.springframework.context.annotation.Import;
@@ -24,10 +26,13 @@ public class Neo4jGraphServiceFactory {
@Qualifier("entityRegistry")
private EntityRegistry entityRegistry;
+ @Value("${neo4j.database}")
+ private String neo4jDatabase;
+
@Bean(name = "neo4jGraphService")
@Nonnull
protected Neo4jGraphService getInstance() {
LineageRegistry lineageRegistry = new LineageRegistry(entityRegistry);
- return new Neo4jGraphService(lineageRegistry, neo4jDriver);
+ return new Neo4jGraphService(lineageRegistry, neo4jDriver, SessionConfig.forDatabase(neo4jDatabase));
}
}
From 332d4afaab39e4b9e9ff73a48e3bfec9b21fe0b5 Mon Sep 17 00:00:00 2001
From: Gabe Lyons
Date: Wed, 8 Nov 2023 10:22:09 -0800
Subject: [PATCH 073/792] feat(subtypes): support subtypes for charts in the UI
(#9186)
---
.../java/com/linkedin/datahub/graphql/GmsGraphQLEngine.java | 4 ++++
datahub-graphql-core/src/main/resources/entity.graphql | 5 +++++
datahub-web-react/src/app/entity/chart/ChartEntity.tsx | 4 ++++
.../src/app/entity/chart/preview/ChartPreview.tsx | 5 ++++-
datahub-web-react/src/graphql/chart.graphql | 3 +++
datahub-web-react/src/graphql/lineage.graphql | 3 +++
datahub-web-react/src/graphql/search.graphql | 6 ++++++
metadata-models/src/main/resources/entity-registry.yml | 1 +
8 files changed, 30 insertions(+), 1 deletion(-)
diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/GmsGraphQLEngine.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/GmsGraphQLEngine.java
index b99f712034fe0..b0b26f073876c 100644
--- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/GmsGraphQLEngine.java
+++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/GmsGraphQLEngine.java
@@ -1433,6 +1433,10 @@ private void configureChartResolvers(final RuntimeWiring.Builder builder) {
.dataFetcher("statsSummary", new ChartStatsSummaryResolver(this.timeseriesAspectService))
.dataFetcher("privileges", new EntityPrivilegesResolver(entityClient))
.dataFetcher("exists", new EntityExistsResolver(entityService))
+ .dataFetcher("subTypes", new SubTypesResolver(
+ this.entityClient,
+ "chart",
+ "subTypes"))
);
builder.type("ChartInfo", typeWiring -> typeWiring
.dataFetcher("inputs", new LoadableTypeBatchResolver<>(datasetType,
diff --git a/datahub-graphql-core/src/main/resources/entity.graphql b/datahub-graphql-core/src/main/resources/entity.graphql
index b37a8f34fa056..035f756a10d55 100644
--- a/datahub-graphql-core/src/main/resources/entity.graphql
+++ b/datahub-graphql-core/src/main/resources/entity.graphql
@@ -5249,6 +5249,11 @@ type Chart implements EntityWithRelationships & Entity & BrowsableEntity {
Whether or not this entity exists on DataHub
"""
exists: Boolean
+
+ """
+ Sub Types that this entity implements
+ """
+ subTypes: SubTypes
}
"""
diff --git a/datahub-web-react/src/app/entity/chart/ChartEntity.tsx b/datahub-web-react/src/app/entity/chart/ChartEntity.tsx
index 0f1b6dbf3d660..fc898dec9d93a 100644
--- a/datahub-web-react/src/app/entity/chart/ChartEntity.tsx
+++ b/datahub-web-react/src/app/entity/chart/ChartEntity.tsx
@@ -154,10 +154,12 @@ export class ChartEntity implements Entity {
getOverridePropertiesFromEntity = (chart?: Chart | null): GenericEntityProperties => {
// TODO: Get rid of this once we have correctly formed platform coming back.
const name = chart?.properties?.name;
+ const subTypes = chart?.subTypes;
const externalUrl = chart?.properties?.externalUrl;
return {
name,
externalUrl,
+ entityTypeOverride: subTypes ? capitalizeFirstLetterOnly(subTypes.typeNames?.[0]) : '',
};
};
@@ -187,6 +189,7 @@ export class ChartEntity implements Entity {
return (
{
type: EntityType.Chart,
icon: entity?.platform?.properties?.logoUrl || undefined,
platform: entity?.platform,
+ subtype: entity?.subTypes?.typeNames?.[0] || undefined,
};
};
diff --git a/datahub-web-react/src/app/entity/chart/preview/ChartPreview.tsx b/datahub-web-react/src/app/entity/chart/preview/ChartPreview.tsx
index 7d0fc143043e2..b7fbd63ee231e 100644
--- a/datahub-web-react/src/app/entity/chart/preview/ChartPreview.tsx
+++ b/datahub-web-react/src/app/entity/chart/preview/ChartPreview.tsx
@@ -15,6 +15,7 @@ import {
EntityPath,
} from '../../../../types.generated';
import DefaultPreviewCard from '../../../preview/DefaultPreviewCard';
+import { capitalizeFirstLetterOnly } from '../../../shared/textUtil';
import { useEntityRegistry } from '../../../useEntityRegistry';
import { IconStyleType } from '../../Entity';
import { ChartStatsSummary as ChartStatsSummaryView } from '../shared/ChartStatsSummary';
@@ -43,6 +44,7 @@ export const ChartPreview = ({
snippet,
degree,
paths,
+ subType,
}: {
urn: string;
platform?: string;
@@ -67,6 +69,7 @@ export const ChartPreview = ({
snippet?: React.ReactNode | null;
degree?: number;
paths?: EntityPath[];
+ subType?: string | null;
}): JSX.Element => {
const entityRegistry = useEntityRegistry();
@@ -76,7 +79,7 @@ export const ChartPreview = ({
name={name || ''}
urn={urn}
description={description || ''}
- type="Chart"
+ type={capitalizeFirstLetterOnly(subType) || 'Chart'}
typeIcon={entityRegistry.getIcon(EntityType.Chart, 14, IconStyleType.ACCENT)}
logoUrl={logoUrl || ''}
platform={platform}
diff --git a/datahub-web-react/src/graphql/chart.graphql b/datahub-web-react/src/graphql/chart.graphql
index d4d3c3c918408..a4b430720fa3d 100644
--- a/datahub-web-react/src/graphql/chart.graphql
+++ b/datahub-web-react/src/graphql/chart.graphql
@@ -100,6 +100,9 @@ query getChart($urn: String!) {
canEditLineage
canEditEmbed
}
+ subTypes {
+ typeNames
+ }
}
}
diff --git a/datahub-web-react/src/graphql/lineage.graphql b/datahub-web-react/src/graphql/lineage.graphql
index 52385dee8631a..8fdfb696e0894 100644
--- a/datahub-web-react/src/graphql/lineage.graphql
+++ b/datahub-web-react/src/graphql/lineage.graphql
@@ -165,6 +165,9 @@ fragment lineageNodeProperties on EntityWithRelationships {
status {
removed
}
+ subTypes {
+ typeNames
+ }
}
... on Dataset {
name
diff --git a/datahub-web-react/src/graphql/search.graphql b/datahub-web-react/src/graphql/search.graphql
index 2297c2d0c1d07..876be12fd335b 100644
--- a/datahub-web-react/src/graphql/search.graphql
+++ b/datahub-web-react/src/graphql/search.graphql
@@ -105,6 +105,9 @@ fragment autoCompleteFields on Entity {
parentContainers {
...parentContainersFields
}
+ subTypes {
+ typeNames
+ }
}
... on DataFlow {
orchestrator
@@ -550,6 +553,9 @@ fragment searchResultFields on Entity {
}
}
}
+ subTypes {
+ typeNames
+ }
}
... on DataFlow {
flowId
diff --git a/metadata-models/src/main/resources/entity-registry.yml b/metadata-models/src/main/resources/entity-registry.yml
index 11d0f74305d7b..a5296d074093b 100644
--- a/metadata-models/src/main/resources/entity-registry.yml
+++ b/metadata-models/src/main/resources/entity-registry.yml
@@ -120,6 +120,7 @@ entities:
- globalTags
- glossaryTerms
- browsePathsV2
+ - subTypes
- name: dashboard
keyAspect: dashboardKey
aspects:
From 72135914109a241aa11ceaeb68b9ac56134e7e64 Mon Sep 17 00:00:00 2001
From: Chris Collins
Date: Wed, 8 Nov 2023 14:36:33 -0500
Subject: [PATCH 074/792] feat(ui) Debounce auto-complete in search bar (#9205)
---
datahub-web-react/src/app/home/HomePageHeader.tsx | 6 ++++--
datahub-web-react/src/app/search/SearchablePage.tsx | 6 ++++--
2 files changed, 8 insertions(+), 4 deletions(-)
diff --git a/datahub-web-react/src/app/home/HomePageHeader.tsx b/datahub-web-react/src/app/home/HomePageHeader.tsx
index 0052d54f562eb..c881109f6e419 100644
--- a/datahub-web-react/src/app/home/HomePageHeader.tsx
+++ b/datahub-web-react/src/app/home/HomePageHeader.tsx
@@ -1,6 +1,7 @@
import React, { useEffect, useMemo, useState } from 'react';
import { useHistory } from 'react-router';
import { Typography, Image, Row, Button, Tag } from 'antd';
+import { debounce } from 'lodash';
import styled, { useTheme } from 'styled-components/macro';
import { RightOutlined } from '@ant-design/icons';
import { ManageAccount } from '../shared/ManageAccount';
@@ -24,6 +25,7 @@ import { getAutoCompleteInputFromQuickFilter } from '../search/utils/filterUtils
import { useUserContext } from '../context/useUserContext';
import AcrylDemoBanner from './AcrylDemoBanner';
import DemoButton from '../entity/shared/components/styled/DemoButton';
+import { HALF_SECOND_IN_MS } from '../entity/shared/tabs/Dataset/Queries/utils/constants';
const Background = styled.div`
width: 100%;
@@ -176,7 +178,7 @@ export const HomePageHeader = () => {
});
};
- const onAutoComplete = (query: string) => {
+ const onAutoComplete = debounce((query: string) => {
if (query && query.trim() !== '') {
getAutoCompleteResultsForMultiple({
variables: {
@@ -189,7 +191,7 @@ export const HomePageHeader = () => {
},
});
}
- };
+ }, HALF_SECOND_IN_MS);
const onClickExploreAll = () => {
analytics.event({
diff --git a/datahub-web-react/src/app/search/SearchablePage.tsx b/datahub-web-react/src/app/search/SearchablePage.tsx
index 489687050c749..9d02d85d3634c 100644
--- a/datahub-web-react/src/app/search/SearchablePage.tsx
+++ b/datahub-web-react/src/app/search/SearchablePage.tsx
@@ -1,5 +1,6 @@
import React, { useEffect, useState } from 'react';
import { useHistory, useLocation } from 'react-router';
+import { debounce } from 'lodash';
import * as QueryString from 'query-string';
import { useTheme } from 'styled-components';
import { SearchHeader } from './SearchHeader';
@@ -17,6 +18,7 @@ import { getAutoCompleteInputFromQuickFilter } from './utils/filterUtils';
import { useQuickFiltersContext } from '../../providers/QuickFiltersContext';
import { useUserContext } from '../context/useUserContext';
import { useSelectedSortOption } from './context/SearchContext';
+import { HALF_SECOND_IN_MS } from '../entity/shared/tabs/Dataset/Queries/utils/constants';
const styles = {
children: {
@@ -93,7 +95,7 @@ export const SearchablePage = ({ onSearch, onAutoComplete, children }: Props) =>
});
};
- const autoComplete = (query: string) => {
+ const autoComplete = debounce((query: string) => {
if (query && query.trim() !== '') {
getAutoCompleteResults({
variables: {
@@ -105,7 +107,7 @@ export const SearchablePage = ({ onSearch, onAutoComplete, children }: Props) =>
},
});
}
- };
+ }, HALF_SECOND_IN_MS);
// Load correct autocomplete results on initial page load.
useEffect(() => {
From 70692b44e995eab252a2344496141acdf6181908 Mon Sep 17 00:00:00 2001
From: Gabe Lyons
Date: Wed, 8 Nov 2023 12:49:23 -0800
Subject: [PATCH 075/792] fix(lineage): magical lineage layout fix (#9187)
---
.../src/app/lineage/utils/layoutTree.ts | 21 +++++++++++++++----
1 file changed, 17 insertions(+), 4 deletions(-)
diff --git a/datahub-web-react/src/app/lineage/utils/layoutTree.ts b/datahub-web-react/src/app/lineage/utils/layoutTree.ts
index cc704007049c2..a972a62308f07 100644
--- a/datahub-web-react/src/app/lineage/utils/layoutTree.ts
+++ b/datahub-web-react/src/app/lineage/utils/layoutTree.ts
@@ -32,6 +32,21 @@ function getParentRelationship(direction: Direction, parent: VizNode | null, nod
return directionRelationships?.find((r) => r?.entity?.urn === node?.urn);
}
+// this utility function is to help make sure layouts that contain many references to the same URN don't struggle laying out that URN.
+function firstAppearanceIndices(arr) {
+ const seen = new Set(); // To track which strings have been seen
+ const result = [] as number[];
+
+ for (let i = 0; i < arr.length; i++) {
+ if (!seen.has(arr[i])) {
+ seen.add(arr[i]); // Add the string to the set
+ result.push(i); // Save the index
+ }
+ }
+
+ return result;
+}
+
function layoutNodesForOneDirection(
data: NodeData,
direction: Direction,
@@ -54,12 +69,10 @@ function layoutNodesForOneDirection(
while (nodesInCurrentLayer.length > 0) {
// if we've already added a node to the viz higher up dont add it again
const urnsToAddInCurrentLayer = Array.from(new Set(nodesInCurrentLayer.map(({ node }) => node.urn || '')));
- const nodesToAddInCurrentLayer = urnsToAddInCurrentLayer
- .filter((urn, pos) => urnsToAddInCurrentLayer.indexOf(urn) === pos)
- .filter((urn) => !nodesByUrn[urn || '']);
+ const positionsToAddInCurrentLayer = firstAppearanceIndices(urnsToAddInCurrentLayer);
const filteredNodesInCurrentLayer = nodesInCurrentLayer
- .filter(({ node }) => nodesToAddInCurrentLayer.indexOf(node.urn || '') > -1)
+ .filter((_, idx) => positionsToAddInCurrentLayer.indexOf(idx) > -1)
.filter(({ node }) => node.status?.removed !== true);
const layerSize = filteredNodesInCurrentLayer.length;
From f87983d69dc62db5c58dc114f8796dcb9eb1cc95 Mon Sep 17 00:00:00 2001
From: John Joyce
Date: Wed, 8 Nov 2023 13:29:37 -0800
Subject: [PATCH 076/792] refactor(pdl): Refactoring Assertion model enums out
(#9191)
Co-authored-by: Harshal Sheth
---
.../linkedin/assertion/AssertionResult.pdl | 19 +--------------
.../assertion/AssertionResultType.pdl | 23 +++++++++++++++++++
.../linkedin/assertion/AssertionRunEvent.pdl | 7 +-----
.../linkedin/assertion/AssertionRunStatus.pdl | 12 ++++++++++
4 files changed, 37 insertions(+), 24 deletions(-)
create mode 100644 metadata-models/src/main/pegasus/com/linkedin/assertion/AssertionResultType.pdl
create mode 100644 metadata-models/src/main/pegasus/com/linkedin/assertion/AssertionRunStatus.pdl
diff --git a/metadata-models/src/main/pegasus/com/linkedin/assertion/AssertionResult.pdl b/metadata-models/src/main/pegasus/com/linkedin/assertion/AssertionResult.pdl
index ded84e1969153..935f3e5976dfa 100644
--- a/metadata-models/src/main/pegasus/com/linkedin/assertion/AssertionResult.pdl
+++ b/metadata-models/src/main/pegasus/com/linkedin/assertion/AssertionResult.pdl
@@ -9,24 +9,7 @@ record AssertionResult {
*/
@TimeseriesField = {}
@Searchable = {}
- type: enum AssertionResultType {
- /**
- * The Assertion has not yet been fully evaluated
- */
- INIT
- /**
- * The Assertion Succeeded
- */
- SUCCESS
- /**
- * The Assertion Failed
- */
- FAILURE
- /**
- * The Assertion encountered an Error
- */
- ERROR
- }
+ type: AssertionResultType
/**
* Number of rows for evaluated batch
diff --git a/metadata-models/src/main/pegasus/com/linkedin/assertion/AssertionResultType.pdl b/metadata-models/src/main/pegasus/com/linkedin/assertion/AssertionResultType.pdl
new file mode 100644
index 0000000000000..8954d94cced7b
--- /dev/null
+++ b/metadata-models/src/main/pegasus/com/linkedin/assertion/AssertionResultType.pdl
@@ -0,0 +1,23 @@
+namespace com.linkedin.assertion
+
+/**
+* The final result of evaluating an assertion, e.g. SUCCESS, FAILURE, or ERROR.
+*/
+enum AssertionResultType {
+ /**
+ * The Assertion has not yet been fully evaluated
+ */
+ INIT
+ /**
+ * The Assertion Succeeded
+ */
+ SUCCESS
+ /**
+ * The Assertion Failed
+ */
+ FAILURE
+ /**
+ * The Assertion encountered an Error
+ */
+ ERROR
+}
\ No newline at end of file
diff --git a/metadata-models/src/main/pegasus/com/linkedin/assertion/AssertionRunEvent.pdl b/metadata-models/src/main/pegasus/com/linkedin/assertion/AssertionRunEvent.pdl
index 14f1204232740..55bcae77273db 100644
--- a/metadata-models/src/main/pegasus/com/linkedin/assertion/AssertionRunEvent.pdl
+++ b/metadata-models/src/main/pegasus/com/linkedin/assertion/AssertionRunEvent.pdl
@@ -39,12 +39,7 @@ record AssertionRunEvent {
* The status of the assertion run as per this timeseries event.
*/
@TimeseriesField = {}
- status: enum AssertionRunStatus {
- /**
- * The Assertion Run has completed
- */
- COMPLETE
- }
+ status: AssertionRunStatus
/**
* Results of assertion, present if the status is COMPLETE
diff --git a/metadata-models/src/main/pegasus/com/linkedin/assertion/AssertionRunStatus.pdl b/metadata-models/src/main/pegasus/com/linkedin/assertion/AssertionRunStatus.pdl
new file mode 100644
index 0000000000000..e4e17925ede82
--- /dev/null
+++ b/metadata-models/src/main/pegasus/com/linkedin/assertion/AssertionRunStatus.pdl
@@ -0,0 +1,12 @@
+namespace com.linkedin.assertion
+
+
+/**
+* The lifecycle status of an assertion run.
+*/
+enum AssertionRunStatus {
+ /**
+ * The Assertion Run has completed
+ */
+ COMPLETE
+}
\ No newline at end of file
From f38c8087bb508a779d94d04967a9c449f6d93126 Mon Sep 17 00:00:00 2001
From: Pedro Silva
Date: Wed, 8 Nov 2023 22:38:15 +0000
Subject: [PATCH 077/792] feat(auth): Add roles to policy engine validation
logic (#9178)
---
.../authorization/AuthorizedActors.java | 1 +
.../authorization/AuthorizerChain.java | 5 +
.../authorization/DataHubAuthorizer.java | 8 +-
.../datahub/authorization/PolicyEngine.java | 43 +++-----
.../authorization/DataHubAuthorizerTest.java | 97 ++++++++++++++++---
.../authorization/PolicyEngineTest.java | 54 ++++++++++-
.../datahub/plugins/test/TestAuthorizer.java | 2 +-
7 files changed, 162 insertions(+), 48 deletions(-)
diff --git a/metadata-auth/auth-api/src/main/java/com/datahub/authorization/AuthorizedActors.java b/metadata-auth/auth-api/src/main/java/com/datahub/authorization/AuthorizedActors.java
index aec99e1b1e57a..5a9990552bb34 100644
--- a/metadata-auth/auth-api/src/main/java/com/datahub/authorization/AuthorizedActors.java
+++ b/metadata-auth/auth-api/src/main/java/com/datahub/authorization/AuthorizedActors.java
@@ -15,6 +15,7 @@ public class AuthorizedActors {
String privilege;
List users;
List groups;
+ List roles;
boolean allUsers;
boolean allGroups;
}
diff --git a/metadata-service/auth-impl/src/main/java/com/datahub/authorization/AuthorizerChain.java b/metadata-service/auth-impl/src/main/java/com/datahub/authorization/AuthorizerChain.java
index f8eca541e1efb..7e7a1de176f06 100644
--- a/metadata-service/auth-impl/src/main/java/com/datahub/authorization/AuthorizerChain.java
+++ b/metadata-service/auth-impl/src/main/java/com/datahub/authorization/AuthorizerChain.java
@@ -126,11 +126,16 @@ private AuthorizedActors mergeAuthorizedActors(@Nullable AuthorizedActors origin
mergedGroups = new ArrayList<>(groups);
}
+ Set roles = new HashSet<>(original.getRoles());
+ roles.addAll(other.getRoles());
+ List mergedRoles = new ArrayList<>(roles);
+
return AuthorizedActors.builder()
.allUsers(original.isAllUsers() || other.isAllUsers())
.allGroups(original.isAllGroups() || other.isAllGroups())
.users(mergedUsers)
.groups(mergedGroups)
+ .roles(mergedRoles)
.build();
}
diff --git a/metadata-service/auth-impl/src/main/java/com/datahub/authorization/DataHubAuthorizer.java b/metadata-service/auth-impl/src/main/java/com/datahub/authorization/DataHubAuthorizer.java
index f8f99475de23e..956d635c7901a 100644
--- a/metadata-service/auth-impl/src/main/java/com/datahub/authorization/DataHubAuthorizer.java
+++ b/metadata-service/auth-impl/src/main/java/com/datahub/authorization/DataHubAuthorizer.java
@@ -133,6 +133,7 @@ public AuthorizedActors authorizedActors(
final List authorizedUsers = new ArrayList<>();
final List authorizedGroups = new ArrayList<>();
+ final List authorizedRoles = new ArrayList<>();
boolean allUsers = false;
boolean allGroups = false;
@@ -153,16 +154,17 @@ public AuthorizedActors authorizedActors(
// Step 3: For each matching policy, add actors that are authorized.
authorizedUsers.addAll(matchingActors.getUsers());
authorizedGroups.addAll(matchingActors.getGroups());
- if (matchingActors.allUsers()) {
+ authorizedRoles.addAll(matchingActors.getRoles());
+ if (matchingActors.getAllUsers()) {
allUsers = true;
}
- if (matchingActors.allGroups()) {
+ if (matchingActors.getAllGroups()) {
allGroups = true;
}
}
// Step 4: Return all authorized users and groups.
- return new AuthorizedActors(privilege, authorizedUsers, authorizedGroups, allUsers, allGroups);
+ return new AuthorizedActors(privilege, authorizedUsers, authorizedGroups, authorizedRoles, allUsers, allGroups);
}
/**
diff --git a/metadata-service/auth-impl/src/main/java/com/datahub/authorization/PolicyEngine.java b/metadata-service/auth-impl/src/main/java/com/datahub/authorization/PolicyEngine.java
index f8c017ea74e1f..da0ae26f2b1da 100644
--- a/metadata-service/auth-impl/src/main/java/com/datahub/authorization/PolicyEngine.java
+++ b/metadata-service/auth-impl/src/main/java/com/datahub/authorization/PolicyEngine.java
@@ -32,7 +32,10 @@
import java.util.stream.Stream;
import javax.annotation.Nullable;
+import lombok.AccessLevel;
+import lombok.AllArgsConstructor;
import lombok.RequiredArgsConstructor;
+import lombok.Value;
import lombok.extern.slf4j.Slf4j;
import static com.linkedin.metadata.Constants.*;
@@ -75,6 +78,7 @@ public PolicyActors getMatchingActors(
final Optional resource) {
final List users = new ArrayList<>();
final List groups = new ArrayList<>();
+ final List roles = new ArrayList<>();
boolean allUsers = false;
boolean allGroups = false;
if (policyMatchesResource(policy, resource)) {
@@ -96,6 +100,9 @@ public PolicyActors getMatchingActors(
if (actorFilter.getGroups() != null) {
groups.addAll(actorFilter.getGroups());
}
+ if (actorFilter.getRoles() != null) {
+ roles.addAll(actorFilter.getRoles());
+ }
// 2. Fetch Actors based on resource ownership.
if (actorFilter.isResourceOwners() && resource.isPresent()) {
@@ -104,7 +111,7 @@ public PolicyActors getMatchingActors(
groups.addAll(groupOwners(owners));
}
}
- return new PolicyActors(users, groups, allUsers, allGroups);
+ return new PolicyActors(users, groups, roles, allUsers, allGroups);
}
private boolean isPolicyApplicable(
@@ -438,34 +445,14 @@ public boolean isGranted() {
/**
* Class used to represent all valid users of a policy.
*/
+ @Value
+ @AllArgsConstructor(access = AccessLevel.PUBLIC)
public static class PolicyActors {
- final List _users;
- final List _groups;
- final Boolean _allUsers;
- final Boolean _allGroups;
-
- public PolicyActors(final List users, final List groups, final Boolean allUsers, final Boolean allGroups) {
- _users = users;
- _groups = groups;
- _allUsers = allUsers;
- _allGroups = allGroups;
- }
-
- public List getUsers() {
- return _users;
- }
-
- public List getGroups() {
- return _groups;
- }
-
- public Boolean allUsers() {
- return _allUsers;
- }
-
- public Boolean allGroups() {
- return _allGroups;
- }
+ List users;
+ List groups;
+ List roles;
+ Boolean allUsers;
+ Boolean allGroups;
}
private List userOwners(final Set owners) {
diff --git a/metadata-service/auth-impl/src/test/java/com/datahub/authorization/DataHubAuthorizerTest.java b/metadata-service/auth-impl/src/test/java/com/datahub/authorization/DataHubAuthorizerTest.java
index babb1c5d00ee8..b0b206001209c 100644
--- a/metadata-service/auth-impl/src/test/java/com/datahub/authorization/DataHubAuthorizerTest.java
+++ b/metadata-service/auth-impl/src/test/java/com/datahub/authorization/DataHubAuthorizerTest.java
@@ -21,6 +21,7 @@
import com.linkedin.entity.EnvelopedAspect;
import com.linkedin.entity.EnvelopedAspectMap;
import com.linkedin.entity.client.EntityClient;
+import com.linkedin.identity.RoleMembership;
import com.linkedin.metadata.query.SearchFlags;
import com.linkedin.metadata.search.ScrollResult;
import com.linkedin.metadata.search.SearchEntity;
@@ -55,6 +56,7 @@
import static org.mockito.Mockito.when;
import static org.testng.Assert.assertEquals;
import static org.testng.Assert.assertTrue;
+import static org.testng.Assert.assertFalse;
public class DataHubAuthorizerTest {
@@ -63,6 +65,7 @@ public class DataHubAuthorizerTest {
private static final Urn PARENT_DOMAIN_URN = UrnUtils.getUrn("urn:li:domain:parent");
private static final Urn CHILD_DOMAIN_URN = UrnUtils.getUrn("urn:li:domain:child");
+ private static final Urn USER_WITH_ADMIN_ROLE = UrnUtils.getUrn("urn:li:corpuser:user-with-admin");
private EntityClient _entityClient;
private DataHubAuthorizer _dataHubAuthorizer;
@@ -92,40 +95,56 @@ public void setupTest() throws Exception {
final EnvelopedAspectMap childDomainPolicyAspectMap = new EnvelopedAspectMap();
childDomainPolicyAspectMap.put(DATAHUB_POLICY_INFO_ASPECT_NAME, new EnvelopedAspect().setValue(new Aspect(childDomainPolicy.data())));
+ final Urn adminPolicyUrn = Urn.createFromString("urn:li:dataHubPolicy:4");
+ final DataHubActorFilter actorFilter = new DataHubActorFilter();
+ actorFilter.setRoles(new UrnArray(ImmutableList.of(Urn.createFromString("urn:li:dataHubRole:Admin"))));
+ final DataHubPolicyInfo adminPolicy = createDataHubPolicyInfoFor(true, ImmutableList.of("EDIT_USER_PROFILE"), null, actorFilter);
+ final EnvelopedAspectMap adminPolicyAspectMap = new EnvelopedAspectMap();
+ adminPolicyAspectMap.put(DATAHUB_POLICY_INFO_ASPECT_NAME, new EnvelopedAspect().setValue(new Aspect(adminPolicy.data())));
+
final ScrollResult policySearchResult1 = new ScrollResult()
.setScrollId("1")
- .setNumEntities(4)
+ .setNumEntities(5)
.setEntities(
new SearchEntityArray(
ImmutableList.of(new SearchEntity().setEntity(activePolicyUrn))));
final ScrollResult policySearchResult2 = new ScrollResult()
.setScrollId("2")
- .setNumEntities(4)
+ .setNumEntities(5)
.setEntities(
new SearchEntityArray(
ImmutableList.of(new SearchEntity().setEntity(inactivePolicyUrn))));
final ScrollResult policySearchResult3 = new ScrollResult()
.setScrollId("3")
- .setNumEntities(4)
+ .setNumEntities(5)
.setEntities(
new SearchEntityArray(
ImmutableList.of(new SearchEntity().setEntity(parentDomainPolicyUrn))));
final ScrollResult policySearchResult4 = new ScrollResult()
- .setNumEntities(4)
+ .setScrollId("4")
+ .setNumEntities(5)
.setEntities(
new SearchEntityArray(
ImmutableList.of(
new SearchEntity().setEntity(childDomainPolicyUrn))));
+ final ScrollResult policySearchResult5 = new ScrollResult()
+ .setNumEntities(5)
+ .setEntities(
+ new SearchEntityArray(
+ ImmutableList.of(
+ new SearchEntity().setEntity(adminPolicyUrn))));
+
when(_entityClient.scrollAcrossEntities(eq(List.of("dataHubPolicy")), eq(""), isNull(), any(), isNull(),
anyInt(), eq(new SearchFlags().setFulltext(true).setSkipAggregates(true).setSkipHighlighting(true).setSkipCache(true)), any()))
.thenReturn(policySearchResult1)
.thenReturn(policySearchResult2)
.thenReturn(policySearchResult3)
- .thenReturn(policySearchResult4);
+ .thenReturn(policySearchResult4)
+ .thenReturn(policySearchResult5);
when(_entityClient.batchGetV2(eq(POLICY_ENTITY_NAME), any(), eq(null), any())).thenAnswer(args -> {
Set inputUrns = args.getArgument(1);
@@ -140,6 +159,8 @@ public void setupTest() throws Exception {
return Map.of(parentDomainPolicyUrn, new EntityResponse().setUrn(parentDomainPolicyUrn).setAspects(parentDomainPolicyAspectMap));
case "urn:li:dataHubPolicy:3":
return Map.of(childDomainPolicyUrn, new EntityResponse().setUrn(childDomainPolicyUrn).setAspects(childDomainPolicyAspectMap));
+ case "urn:li:dataHubPolicy:4":
+ return Map.of(adminPolicyUrn, new EntityResponse().setUrn(adminPolicyUrn).setAspects(adminPolicyAspectMap));
default:
throw new IllegalStateException();
}
@@ -167,6 +188,10 @@ public void setupTest() throws Exception {
when(_entityClient.batchGetV2(any(), eq(Collections.singleton(PARENT_DOMAIN_URN)), eq(Collections.singleton(DOMAIN_PROPERTIES_ASPECT_NAME)), any()))
.thenReturn(createDomainPropertiesBatchResponse(null));
+ // Mocks to reach role membership for a user urn
+ when(_entityClient.batchGetV2(any(), eq(Collections.singleton(USER_WITH_ADMIN_ROLE)), eq(Collections.singleton(ROLE_MEMBERSHIP_ASPECT_NAME)), any())
+ ).thenReturn(createUserRoleMembershipBatchResponse(USER_WITH_ADMIN_ROLE, UrnUtils.getUrn("urn:li:dataHubRole:Admin")));
+
final Authentication systemAuthentication = new Authentication(
new Actor(ActorType.USER, DATAHUB_SYSTEM_CLIENT_ID),
""
@@ -302,6 +327,32 @@ public void testAuthorizedActorsActivePolicy() throws Exception {
));
}
+ @Test
+ public void testAuthorizedRoleActivePolicy() throws Exception {
+ final AuthorizedActors actors =
+ _dataHubAuthorizer.authorizedActors("EDIT_USER_PROFILE", // Should be inside the active policy.
+ Optional.of(new EntitySpec("dataset", "urn:li:dataset:1")));
+
+ assertFalse(actors.isAllUsers());
+ assertFalse(actors.isAllGroups());
+ assertEquals(new HashSet<>(actors.getUsers()), ImmutableSet.of());
+ assertEquals(new HashSet<>(actors.getGroups()), ImmutableSet.of());
+ assertEquals(new HashSet<>(actors.getRoles()), ImmutableSet.of(UrnUtils.getUrn("urn:li:dataHubRole:Admin")));
+ }
+
+ @Test
+ public void testAuthorizationBasedOnRoleIsAllowed() {
+ EntitySpec resourceSpec = new EntitySpec("dataset", "urn:li:dataset:test");
+
+ AuthorizationRequest request = new AuthorizationRequest(
+ USER_WITH_ADMIN_ROLE.toString(),
+ "EDIT_USER_PROFILE",
+ Optional.of(resourceSpec)
+ );
+
+ assertEquals(_dataHubAuthorizer.authorize(request).getType(), AuthorizationResult.Type.ALLOW);
+ }
+
@Test
public void testAuthorizationOnDomainWithPrivilegeIsAllowed() {
EntitySpec resourceSpec = new EntitySpec("dataset", "urn:li:dataset:test");
@@ -342,13 +393,6 @@ public void testAuthorizationOnDomainWithoutPrivilegeIsDenied() {
}
private DataHubPolicyInfo createDataHubPolicyInfo(boolean active, List privileges, @Nullable final Urn domain) throws Exception {
- final DataHubPolicyInfo dataHubPolicyInfo = new DataHubPolicyInfo();
- dataHubPolicyInfo.setType(METADATA_POLICY_TYPE);
- dataHubPolicyInfo.setState(active ? ACTIVE_POLICY_STATE : INACTIVE_POLICY_STATE);
- dataHubPolicyInfo.setPrivileges(new StringArray(privileges));
- dataHubPolicyInfo.setDisplayName("My Test Display");
- dataHubPolicyInfo.setDescription("My test display!");
- dataHubPolicyInfo.setEditable(true);
List users = ImmutableList.of(Urn.createFromString("urn:li:corpuser:user1"), Urn.createFromString("urn:li:corpuser:user2"));
List groups = ImmutableList.of(Urn.createFromString("urn:li:corpGroup:group1"), Urn.createFromString("urn:li:corpGroup:group2"));
@@ -359,6 +403,20 @@ private DataHubPolicyInfo createDataHubPolicyInfo(boolean active, List p
actorFilter.setAllGroups(true);
actorFilter.setUsers(new UrnArray(users));
actorFilter.setGroups(new UrnArray(groups));
+
+ return createDataHubPolicyInfoFor(active, privileges, domain, actorFilter);
+ }
+
+ private DataHubPolicyInfo createDataHubPolicyInfoFor(boolean active, List privileges,
+ @Nullable final Urn domain, DataHubActorFilter actorFilter) throws Exception {
+ final DataHubPolicyInfo dataHubPolicyInfo = new DataHubPolicyInfo();
+ dataHubPolicyInfo.setType(METADATA_POLICY_TYPE);
+ dataHubPolicyInfo.setState(active ? ACTIVE_POLICY_STATE : INACTIVE_POLICY_STATE);
+ dataHubPolicyInfo.setPrivileges(new StringArray(privileges));
+ dataHubPolicyInfo.setDisplayName("My Test Display");
+ dataHubPolicyInfo.setDescription("My test display!");
+ dataHubPolicyInfo.setEditable(true);
+
dataHubPolicyInfo.setActors(actorFilter);
final DataHubResourceFilter resourceFilter = new DataHubResourceFilter();
@@ -429,6 +487,21 @@ private Map createDomainPropertiesBatchResponse(@Nullable f
return batchResponse;
}
+ private Map createUserRoleMembershipBatchResponse(final Urn userUrn, @Nullable final Urn roleUrn) {
+ final Map batchResponse = new HashMap<>();
+ final EntityResponse response = new EntityResponse();
+ EnvelopedAspectMap aspectMap = new EnvelopedAspectMap();
+ final RoleMembership membership = new RoleMembership();
+ if (roleUrn != null) {
+ membership.setRoles(new UrnArray(roleUrn));
+ }
+ aspectMap.put(ROLE_MEMBERSHIP_ASPECT_NAME, new EnvelopedAspect()
+ .setValue(new com.linkedin.entity.Aspect(membership.data())));
+ response.setAspects(aspectMap);
+ batchResponse.put(userUrn, response);
+ return batchResponse;
+ }
+
private AuthorizerContext createAuthorizerContext(final Authentication systemAuthentication, final EntityClient entityClient) {
return new AuthorizerContext(Collections.emptyMap(), new DefaultEntitySpecResolver(systemAuthentication, entityClient));
}
diff --git a/metadata-service/auth-impl/src/test/java/com/datahub/authorization/PolicyEngineTest.java b/metadata-service/auth-impl/src/test/java/com/datahub/authorization/PolicyEngineTest.java
index be8c948f8ef89..2790c16ba75e6 100644
--- a/metadata-service/auth-impl/src/test/java/com/datahub/authorization/PolicyEngineTest.java
+++ b/metadata-service/auth-impl/src/test/java/com/datahub/authorization/PolicyEngineTest.java
@@ -1041,6 +1041,7 @@ public void testGetMatchingActorsResourceMatch() throws Exception {
Urn.createFromString("urn:li:corpuser:user2"))));
actorFilter.setGroups(new UrnArray(ImmutableList.of(Urn.createFromString("urn:li:corpGroup:group1"),
Urn.createFromString("urn:li:corpGroup:group2"))));
+ actorFilter.setRoles(new UrnArray(ImmutableList.of(Urn.createFromString("urn:li:role:Admin"))));
dataHubPolicyInfo.setActors(actorFilter);
final DataHubResourceFilter resourceFilter = new DataHubResourceFilter();
@@ -1056,8 +1057,8 @@ public void testGetMatchingActorsResourceMatch() throws Exception {
Collections.emptySet(), Collections.emptySet());
PolicyEngine.PolicyActors actors = _policyEngine.getMatchingActors(dataHubPolicyInfo, Optional.of(resourceSpec));
- assertTrue(actors.allUsers());
- assertTrue(actors.allGroups());
+ assertTrue(actors.getAllUsers());
+ assertTrue(actors.getAllGroups());
assertEquals(actors.getUsers(),
ImmutableList.of(Urn.createFromString("urn:li:corpuser:user1"), Urn.createFromString("urn:li:corpuser:user2"),
@@ -1068,6 +1069,8 @@ public void testGetMatchingActorsResourceMatch() throws Exception {
Urn.createFromString("urn:li:corpGroup:group2"), Urn.createFromString(AUTHORIZED_GROUP) // Resource Owner
));
+ assertEquals(actors.getRoles(), ImmutableList.of(Urn.createFromString("urn:li:role:Admin")));
+
// Verify aspect client called, entity client not called.
verify(_entityClient, times(0)).batchGetV2(eq(CORP_USER_ENTITY_NAME), eq(Collections.singleton(authorizedUserUrn)),
eq(null), any());
@@ -1106,15 +1109,58 @@ public void testGetMatchingActorsNoResourceMatch() throws Exception {
buildEntityResolvers("dataset", "urn:li:dataset:random"); // A resource not covered by the policy.
PolicyEngine.PolicyActors actors = _policyEngine.getMatchingActors(dataHubPolicyInfo, Optional.of(resourceSpec));
- assertFalse(actors.allUsers());
- assertFalse(actors.allGroups());
+ assertFalse(actors.getAllUsers());
+ assertFalse(actors.getAllGroups());
assertEquals(actors.getUsers(), Collections.emptyList());
assertEquals(actors.getGroups(), Collections.emptyList());
+ //assertEquals(actors.getRoles(), Collections.emptyList());
// Verify no network calls
verify(_entityClient, times(0)).batchGetV2(any(), any(), any(), any());
}
+ @Test
+ public void testGetMatchingActorsByRoleResourceMatch() throws Exception {
+ final DataHubPolicyInfo dataHubPolicyInfo = new DataHubPolicyInfo();
+ dataHubPolicyInfo.setType(METADATA_POLICY_TYPE);
+ dataHubPolicyInfo.setState(ACTIVE_POLICY_STATE);
+ dataHubPolicyInfo.setPrivileges(new StringArray("EDIT_ENTITY_TAGS"));
+ dataHubPolicyInfo.setDisplayName("My Test Display");
+ dataHubPolicyInfo.setDescription("My test display!");
+ dataHubPolicyInfo.setEditable(true);
+
+ final DataHubActorFilter actorFilter = new DataHubActorFilter();
+ actorFilter.setResourceOwners(true);
+ actorFilter.setAllUsers(false);
+ actorFilter.setAllGroups(false);
+ actorFilter.setRoles(new UrnArray(ImmutableList.of(Urn.createFromString("urn:li:dataHubRole:Editor"))));
+ dataHubPolicyInfo.setActors(actorFilter);
+
+ final DataHubResourceFilter resourceFilter = new DataHubResourceFilter();
+ resourceFilter.setAllResources(false);
+ resourceFilter.setType("dataset");
+ StringArray resourceUrns = new StringArray();
+ resourceUrns.add(RESOURCE_URN);
+ resourceFilter.setResources(resourceUrns);
+ dataHubPolicyInfo.setResources(resourceFilter);
+
+ ResolvedEntitySpec resourceSpec = buildEntityResolvers("dataset", RESOURCE_URN, ImmutableSet.of(),
+ Collections.emptySet(), Collections.emptySet());
+
+ PolicyEngine.PolicyActors actors = _policyEngine.getMatchingActors(dataHubPolicyInfo, Optional.of(resourceSpec));
+
+ assertFalse(actors.getAllUsers());
+ assertFalse(actors.getAllGroups());
+
+ assertEquals(actors.getUsers(), ImmutableList.of());
+ assertEquals(actors.getGroups(), ImmutableList.of());
+ assertEquals(actors.getRoles(), ImmutableList.of(Urn.createFromString("urn:li:dataHubRole:Editor")));
+
+ // Verify aspect client called, entity client not called.
+ verify(_entityClient, times(0)).batchGetV2(eq(CORP_USER_ENTITY_NAME), eq(Collections.singleton(authorizedUserUrn)),
+ eq(null), any());
+ }
+
private Ownership createOwnershipAspect(final Boolean addUserOwner, final Boolean addGroupOwner) throws Exception {
final Ownership ownershipAspect = new Ownership();
final OwnerArray owners = new OwnerArray();
diff --git a/metadata-service/plugin/src/test/sample-test-plugins/src/main/java/com/datahub/plugins/test/TestAuthorizer.java b/metadata-service/plugin/src/test/sample-test-plugins/src/main/java/com/datahub/plugins/test/TestAuthorizer.java
index 442ac1b0d287b..e5f3e223ff505 100644
--- a/metadata-service/plugin/src/test/sample-test-plugins/src/main/java/com/datahub/plugins/test/TestAuthorizer.java
+++ b/metadata-service/plugin/src/test/sample-test-plugins/src/main/java/com/datahub/plugins/test/TestAuthorizer.java
@@ -75,7 +75,7 @@ public AuthorizationResult authorize(@Nonnull AuthorizationRequest request) {
@Override
public AuthorizedActors authorizedActors(String privilege, Optional resourceSpec) {
- return new AuthorizedActors("ALL", null, null, true, true);
+ return new AuthorizedActors("ALL", null, null, null, true, true);
}
}
From f73ecfdcbbc35437fcb80c9e27e78908dae23ea7 Mon Sep 17 00:00:00 2001
From: Andrew Sikowitz
Date: Wed, 8 Nov 2023 18:17:49 -0500
Subject: [PATCH 078/792] style(ingest/tableau): Rename tableau_constant to c
(#9207)
---
.../src/datahub/ingestion/source/tableau.py | 597 ++++++++----------
.../ingestion/source/tableau_common.py | 14 +-
2 files changed, 272 insertions(+), 339 deletions(-)
diff --git a/metadata-ingestion/src/datahub/ingestion/source/tableau.py b/metadata-ingestion/src/datahub/ingestion/source/tableau.py
index 4bc40b0aac964..08df7599510f4 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/tableau.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/tableau.py
@@ -59,7 +59,7 @@
)
from datahub.ingestion.api.source import MetadataWorkUnitProcessor, Source
from datahub.ingestion.api.workunit import MetadataWorkUnit
-from datahub.ingestion.source import tableau_constant
+from datahub.ingestion.source import tableau_constant as c
from datahub.ingestion.source.common.subtypes import (
BIContainerSubTypes,
DatasetSubTypes,
@@ -720,16 +720,12 @@ def get_connection_object_page(
query, connection_type, query_filter, count, offset, False
)
- if tableau_constant.ERRORS in query_data:
- errors = query_data[tableau_constant.ERRORS]
+ if c.ERRORS in query_data:
+ errors = query_data[c.ERRORS]
if all(
# The format of the error messages is highly unpredictable, so we have to
# be extra defensive with our parsing.
- error
- and (error.get(tableau_constant.EXTENSIONS) or {}).get(
- tableau_constant.SEVERITY
- )
- == tableau_constant.WARNING
+ error and (error.get(c.EXTENSIONS) or {}).get(c.SEVERITY) == c.WARNING
for error in errors
):
self.report.report_warning(key=connection_type, reason=f"{errors}")
@@ -737,14 +733,14 @@ def get_connection_object_page(
raise RuntimeError(f"Query {connection_type} error: {errors}")
connection_object = (
- query_data.get(tableau_constant.DATA).get(connection_type, {})
- if query_data.get(tableau_constant.DATA)
+ query_data.get(c.DATA).get(connection_type, {})
+ if query_data.get(c.DATA)
else {}
)
- total_count = connection_object.get(tableau_constant.TOTAL_COUNT, 0)
- has_next_page = connection_object.get(tableau_constant.PAGE_INFO, {}).get(
- tableau_constant.HAS_NEXT_PAGE, False
+ total_count = connection_object.get(c.TOTAL_COUNT, 0)
+ has_next_page = connection_object.get(c.PAGE_INFO, {}).get(
+ c.HAS_NEXT_PAGE, False
)
return connection_object, total_count, has_next_page
@@ -781,7 +777,7 @@ def get_connection_objects(
offset += count
- for obj in connection_objects.get(tableau_constant.NODES) or []:
+ for obj in connection_objects.get(c.NODES) or []:
yield obj
def emit_workbooks(self) -> Iterable[MetadataWorkUnit]:
@@ -790,11 +786,11 @@ def emit_workbooks(self) -> Iterable[MetadataWorkUnit]:
project.name for project in self.tableau_project_registry.values()
]
project_names_str: str = json.dumps(project_names)
- projects = f"{tableau_constant.PROJECT_NAME_WITH_IN}: {project_names_str}"
+ projects = f"{c.PROJECT_NAME_WITH_IN}: {project_names_str}"
for workbook in self.get_connection_objects(
workbook_graphql_query,
- tableau_constant.WORKBOOKS_CONNECTION,
+ c.WORKBOOKS_CONNECTION,
projects,
page_size_override=self.config.workbook_page_size,
):
@@ -804,11 +800,9 @@ def emit_workbooks(self) -> Iterable[MetadataWorkUnit]:
# however Tableau supports projectLuidWithin in Tableau Cloud June 2022 / Server 2022.3 and later.
project_luid: Optional[str] = self._get_workbook_project_luid(workbook)
if project_luid not in self.tableau_project_registry.keys():
- wrk_name: Optional[str] = workbook.get(tableau_constant.NAME)
- wrk_id: Optional[str] = workbook.get(tableau_constant.ID)
- prj_name: Optional[str] = workbook.get(
- tableau_constant.PROJECT_NAME
- )
+ wrk_name: Optional[str] = workbook.get(c.NAME)
+ wrk_id: Optional[str] = workbook.get(c.ID)
+ prj_name: Optional[str] = workbook.get(c.PROJECT_NAME)
logger.debug(
f"Skipping workbook {wrk_name}({wrk_id}) as it is project {prj_name}({project_luid}) not "
@@ -818,25 +812,22 @@ def emit_workbooks(self) -> Iterable[MetadataWorkUnit]:
yield from self.emit_workbook_as_container(workbook)
- for sheet in workbook.get(tableau_constant.SHEETS, []):
- self.sheet_ids.append(sheet[tableau_constant.ID])
+ for sheet in workbook.get(c.SHEETS, []):
+ self.sheet_ids.append(sheet[c.ID])
- for dashboard in workbook.get(tableau_constant.DASHBOARDS, []):
- self.dashboard_ids.append(dashboard[tableau_constant.ID])
+ for dashboard in workbook.get(c.DASHBOARDS, []):
+ self.dashboard_ids.append(dashboard[c.ID])
- for ds in workbook.get(tableau_constant.EMBEDDED_DATA_SOURCES, []):
- self.embedded_datasource_ids_being_used.append(
- ds[tableau_constant.ID]
- )
+ for ds in workbook.get(c.EMBEDDED_DATA_SOURCES, []):
+ self.embedded_datasource_ids_being_used.append(ds[c.ID])
def _track_custom_sql_ids(self, field: dict) -> None:
# Tableau shows custom sql datasource as a table in ColumnField's upstreamColumns.
- for column in field.get(tableau_constant.UPSTREAM_COLUMNS, []):
+ for column in field.get(c.UPSTREAM_COLUMNS, []):
table_id = (
- column.get(tableau_constant.TABLE, {}).get(tableau_constant.ID)
- if column.get(tableau_constant.TABLE)
- and column[tableau_constant.TABLE][tableau_constant.TYPE_NAME]
- == tableau_constant.CUSTOM_SQL_TABLE
+ column.get(c.TABLE, {}).get(c.ID)
+ if column.get(c.TABLE)
+ and column[c.TABLE][c.TYPE_NAME] == c.CUSTOM_SQL_TABLE
else None
)
@@ -861,15 +852,15 @@ def _create_upstream_table_lineage(
# and published datasource have same upstreamTables in this case.
if upstream_tables and is_embedded_ds:
logger.debug(
- f"Embedded datasource {datasource.get(tableau_constant.ID)} has upstreamDatasources.\
+ f"Embedded datasource {datasource.get(c.ID)} has upstreamDatasources.\
Setting only upstreamDatasources lineage. The upstreamTables lineage \
will be set via upstream published datasource."
)
else:
# This adds an edge to upstream DatabaseTables using `upstreamTables`
upstreams, id_to_urn = self.get_upstream_tables(
- datasource.get(tableau_constant.UPSTREAM_TABLES, []),
- datasource.get(tableau_constant.NAME),
+ datasource.get(c.UPSTREAM_TABLES, []),
+ datasource.get(c.NAME),
browse_path,
is_custom_sql=False,
)
@@ -878,23 +869,23 @@ def _create_upstream_table_lineage(
# This adds an edge to upstream CustomSQLTables using `fields`.`upstreamColumns`.`table`
csql_upstreams, csql_id_to_urn = self.get_upstream_csql_tables(
- datasource.get(tableau_constant.FIELDS) or [],
+ datasource.get(c.FIELDS) or [],
)
upstream_tables.extend(csql_upstreams)
table_id_to_urn.update(csql_id_to_urn)
logger.debug(
- f"A total of {len(upstream_tables)} upstream table edges found for datasource {datasource[tableau_constant.ID]}"
+ f"A total of {len(upstream_tables)} upstream table edges found for datasource {datasource[c.ID]}"
)
datasource_urn = builder.make_dataset_urn_with_platform_instance(
platform=self.platform,
- name=datasource[tableau_constant.ID],
+ name=datasource[c.ID],
platform_instance=self.config.platform_instance,
env=self.config.env,
)
- if datasource.get(tableau_constant.FIELDS):
+ if datasource.get(c.FIELDS):
if self.config.extract_column_level_lineage:
# Find fine grained lineage for datasource column to datasource column edge,
# upstream columns may be from same datasource
@@ -912,20 +903,20 @@ def _create_upstream_table_lineage(
fine_grained_lineages.extend(upstream_columns)
logger.debug(
- f"A total of {len(fine_grained_lineages)} upstream column edges found for datasource {datasource[tableau_constant.ID]}"
+ f"A total of {len(fine_grained_lineages)} upstream column edges found for datasource {datasource[c.ID]}"
)
return upstream_tables, fine_grained_lineages
def get_upstream_datasources(self, datasource: dict) -> List[Upstream]:
upstream_tables = []
- for ds in datasource.get(tableau_constant.UPSTREAM_DATA_SOURCES, []):
- if ds[tableau_constant.ID] not in self.datasource_ids_being_used:
- self.datasource_ids_being_used.append(ds[tableau_constant.ID])
+ for ds in datasource.get(c.UPSTREAM_DATA_SOURCES, []):
+ if ds[c.ID] not in self.datasource_ids_being_used:
+ self.datasource_ids_being_used.append(ds[c.ID])
upstream_ds_urn = builder.make_dataset_urn_with_platform_instance(
platform=self.platform,
- name=ds[tableau_constant.ID],
+ name=ds[c.ID],
platform_instance=self.config.platform_instance,
env=self.config.env,
)
@@ -943,20 +934,15 @@ def get_upstream_csql_tables(
csql_id_to_urn = {}
for field in fields:
- if not field.get(tableau_constant.UPSTREAM_COLUMNS):
+ if not field.get(c.UPSTREAM_COLUMNS):
continue
- for upstream_col in field[tableau_constant.UPSTREAM_COLUMNS]:
+ for upstream_col in field[c.UPSTREAM_COLUMNS]:
if (
upstream_col
- and upstream_col.get(tableau_constant.TABLE)
- and upstream_col.get(tableau_constant.TABLE)[
- tableau_constant.TYPE_NAME
- ]
- == tableau_constant.CUSTOM_SQL_TABLE
+ and upstream_col.get(c.TABLE)
+ and upstream_col.get(c.TABLE)[c.TYPE_NAME] == c.CUSTOM_SQL_TABLE
):
- upstream_table_id = upstream_col.get(tableau_constant.TABLE)[
- tableau_constant.ID
- ]
+ upstream_table_id = upstream_col.get(c.TABLE)[c.ID]
csql_urn = builder.make_dataset_urn_with_platform_instance(
platform=self.platform,
@@ -986,18 +972,18 @@ def get_upstream_tables(
for table in tables:
# skip upstream tables when there is no column info when retrieving datasource
# Lineage and Schema details for these will be taken care in self.emit_custom_sql_datasources()
- num_tbl_cols: Optional[int] = table.get(
- tableau_constant.COLUMNS_CONNECTION
- ) and table[tableau_constant.COLUMNS_CONNECTION].get("totalCount")
+ num_tbl_cols: Optional[int] = table.get(c.COLUMNS_CONNECTION) and table[
+ c.COLUMNS_CONNECTION
+ ].get("totalCount")
if not is_custom_sql and not num_tbl_cols:
logger.debug(
- f"Skipping upstream table with id {table[tableau_constant.ID]}, no columns: {table}"
+ f"Skipping upstream table with id {table[c.ID]}, no columns: {table}"
)
continue
- elif table[tableau_constant.NAME] is None:
+ elif table[c.NAME] is None:
self.report.num_upstream_table_skipped_no_name += 1
logger.warning(
- f"Skipping upstream table {table[tableau_constant.ID]} from lineage since its name is none: {table}"
+ f"Skipping upstream table {table[c.ID]} from lineage since its name is none: {table}"
)
continue
@@ -1014,7 +1000,7 @@ def get_upstream_tables(
self.config.platform_instance_map,
self.config.lineage_overrides,
)
- table_id_to_urn[table[tableau_constant.ID]] = table_urn
+ table_id_to_urn[table[c.ID]] = table_urn
upstream_table = Upstream(
dataset=table_urn,
@@ -1029,13 +1015,13 @@ def get_upstream_tables(
if table_urn not in self.database_tables:
self.database_tables[table_urn] = DatabaseTable(
urn=table_urn,
- id=table[tableau_constant.ID],
+ id=table[c.ID],
num_cols=num_tbl_cols,
paths={table_path} if table_path else set(),
)
else:
self.database_tables[table_urn].update_table(
- table[tableau_constant.ID], num_tbl_cols, table_path
+ table[c.ID], num_tbl_cols, table_path
)
return upstream_tables, table_id_to_urn
@@ -1047,24 +1033,24 @@ def get_upstream_columns_of_fields_in_datasource(
table_id_to_urn: Dict[str, str],
) -> List[FineGrainedLineage]:
fine_grained_lineages = []
- for field in datasource.get(tableau_constant.FIELDS) or []:
- field_name = field.get(tableau_constant.NAME)
+ for field in datasource.get(c.FIELDS) or []:
+ field_name = field.get(c.NAME)
# upstreamColumns lineage will be set via upstreamFields.
# such as for CalculatedField
if (
not field_name
- or not field.get(tableau_constant.UPSTREAM_COLUMNS)
- or field.get(tableau_constant.UPSTREAM_FIELDS)
+ or not field.get(c.UPSTREAM_COLUMNS)
+ or field.get(c.UPSTREAM_FIELDS)
):
continue
input_columns = []
- for upstream_col in field.get(tableau_constant.UPSTREAM_COLUMNS):
+ for upstream_col in field.get(c.UPSTREAM_COLUMNS):
if not upstream_col:
continue
- name = upstream_col.get(tableau_constant.NAME)
+ name = upstream_col.get(c.NAME)
upstream_table_id = (
- upstream_col.get(tableau_constant.TABLE)[tableau_constant.ID]
- if upstream_col.get(tableau_constant.TABLE)
+ upstream_col.get(c.TABLE)[c.ID]
+ if upstream_col.get(c.TABLE)
else None
)
if (
@@ -1110,23 +1096,21 @@ def get_upstream_fields_of_field_in_datasource(
self, datasource: dict, datasource_urn: str
) -> List[FineGrainedLineage]:
fine_grained_lineages = []
- for field in datasource.get(tableau_constant.FIELDS) or []:
- field_name = field.get(tableau_constant.NAME)
+ for field in datasource.get(c.FIELDS) or []:
+ field_name = field.get(c.NAME)
# It is observed that upstreamFields gives one-hop field
# lineage, and not multi-hop field lineage
# This behavior is as desired in our case.
- if not field_name or not field.get(tableau_constant.UPSTREAM_FIELDS):
+ if not field_name or not field.get(c.UPSTREAM_FIELDS):
continue
input_fields = []
- for upstream_field in field.get(tableau_constant.UPSTREAM_FIELDS):
+ for upstream_field in field.get(c.UPSTREAM_FIELDS):
if not upstream_field:
continue
- name = upstream_field.get(tableau_constant.NAME)
+ name = upstream_field.get(c.NAME)
upstream_ds_id = (
- upstream_field.get(tableau_constant.DATA_SOURCE)[
- tableau_constant.ID
- ]
- if upstream_field.get(tableau_constant.DATA_SOURCE)
+ upstream_field.get(c.DATA_SOURCE)[c.ID]
+ if upstream_field.get(c.DATA_SOURCE)
else None
)
if name and upstream_ds_id:
@@ -1212,35 +1196,37 @@ def get_upstream_fields_from_custom_sql(
return fine_grained_lineages
def get_transform_operation(self, field: dict) -> str:
- field_type = field[tableau_constant.TYPE_NAME]
+ field_type = field[c.TYPE_NAME]
if field_type in (
- tableau_constant.DATA_SOURCE_FIELD,
- tableau_constant.COLUMN_FIELD,
+ c.DATA_SOURCE_FIELD,
+ c.COLUMN_FIELD,
):
- op = tableau_constant.IDENTITY # How to specify exact same
- elif field_type == tableau_constant.CALCULATED_FIELD:
+ op = c.IDENTITY # How to specify exact same
+ elif field_type == c.CALCULATED_FIELD:
op = field_type
- if field.get(tableau_constant.FORMULA):
- op += f"formula: {field.get(tableau_constant.FORMULA)}"
+ if field.get(c.FORMULA):
+ op += f"formula: {field.get(c.FORMULA)}"
else:
op = field_type # BinField, CombinedField, etc
return op
def emit_custom_sql_datasources(self) -> Iterable[MetadataWorkUnit]:
- custom_sql_filter = f"{tableau_constant.ID_WITH_IN}: {json.dumps(self.custom_sql_ids_being_used)}"
+ custom_sql_filter = (
+ f"{c.ID_WITH_IN}: {json.dumps(self.custom_sql_ids_being_used)}"
+ )
custom_sql_connection = list(
self.get_connection_objects(
custom_sql_graphql_query,
- tableau_constant.CUSTOM_SQL_TABLE_CONNECTION,
+ c.CUSTOM_SQL_TABLE_CONNECTION,
custom_sql_filter,
)
)
unique_custom_sql = get_unique_custom_sql(custom_sql_connection)
for csql in unique_custom_sql:
- csql_id: str = csql[tableau_constant.ID]
+ csql_id: str = csql[c.ID]
csql_urn = builder.make_dataset_urn_with_platform_instance(
platform=self.platform,
name=csql_id,
@@ -1256,40 +1242,33 @@ def emit_custom_sql_datasources(self) -> Iterable[MetadataWorkUnit]:
datasource_name = None
project = None
- if len(csql[tableau_constant.DATA_SOURCES]) > 0:
+ if len(csql[c.DATA_SOURCES]) > 0:
# CustomSQLTable id owned by exactly one tableau data source
logger.debug(
- f"Number of datasources referencing CustomSQLTable: {len(csql[tableau_constant.DATA_SOURCES])}"
+ f"Number of datasources referencing CustomSQLTable: {len(csql[c.DATA_SOURCES])}"
)
- datasource = csql[tableau_constant.DATA_SOURCES][0]
- datasource_name = datasource.get(tableau_constant.NAME)
+ datasource = csql[c.DATA_SOURCES][0]
+ datasource_name = datasource.get(c.NAME)
if datasource.get(
- tableau_constant.TYPE_NAME
- ) == tableau_constant.EMBEDDED_DATA_SOURCE and datasource.get(
- tableau_constant.WORKBOOK
- ):
+ c.TYPE_NAME
+ ) == c.EMBEDDED_DATA_SOURCE and datasource.get(c.WORKBOOK):
datasource_name = (
- f"{datasource.get(tableau_constant.WORKBOOK).get(tableau_constant.NAME)}/{datasource_name}"
- if datasource_name
- and datasource.get(tableau_constant.WORKBOOK).get(
- tableau_constant.NAME
- )
+ f"{datasource.get(c.WORKBOOK).get(c.NAME)}/{datasource_name}"
+ if datasource_name and datasource.get(c.WORKBOOK).get(c.NAME)
else None
)
logger.debug(
f"Adding datasource {datasource_name}({datasource.get('id')}) to container"
)
yield from add_entity_to_container(
- self.gen_workbook_key(
- datasource[tableau_constant.WORKBOOK][tableau_constant.ID]
- ),
- tableau_constant.DATASET,
+ self.gen_workbook_key(datasource[c.WORKBOOK][c.ID]),
+ c.DATASET,
dataset_snapshot.urn,
)
project = self._get_project_browse_path_name(datasource)
- tables = csql.get(tableau_constant.TABLES, [])
+ tables = csql.get(c.TABLES, [])
if tables:
# lineage from custom sql -> datasets/tables #
@@ -1306,9 +1285,8 @@ def emit_custom_sql_datasources(self) -> Iterable[MetadataWorkUnit]:
# Schema Metadata
# if condition is needed as graphQL return "cloumns": None
columns: List[Dict[Any, Any]] = (
- cast(List[Dict[Any, Any]], csql.get(tableau_constant.COLUMNS))
- if tableau_constant.COLUMNS in csql
- and csql.get(tableau_constant.COLUMNS) is not None
+ cast(List[Dict[Any, Any]], csql.get(c.COLUMNS))
+ if c.COLUMNS in csql and csql.get(c.COLUMNS) is not None
else []
)
schema_metadata = self.get_schema_metadata_for_custom_sql(columns)
@@ -1320,7 +1298,7 @@ def emit_custom_sql_datasources(self) -> Iterable[MetadataWorkUnit]:
if project and datasource_name:
browse_paths = BrowsePathsClass(
paths=[
- f"/{self.config.env.lower()}/{self.platform}/{project}/{datasource[tableau_constant.NAME]}"
+ f"/{self.config.env.lower()}/{self.platform}/{project}/{datasource[c.NAME]}"
]
)
dataset_snapshot.aspects.append(browse_paths)
@@ -1328,27 +1306,25 @@ def emit_custom_sql_datasources(self) -> Iterable[MetadataWorkUnit]:
logger.debug(f"Browse path not set for Custom SQL table {csql_id}")
dataset_properties = DatasetPropertiesClass(
- name=csql.get(tableau_constant.NAME),
- description=csql.get(tableau_constant.DESCRIPTION),
+ name=csql.get(c.NAME),
+ description=csql.get(c.DESCRIPTION),
)
dataset_snapshot.aspects.append(dataset_properties)
- if csql.get(tableau_constant.QUERY):
+ if csql.get(c.QUERY):
view_properties = ViewPropertiesClass(
materialized=False,
- viewLanguage=tableau_constant.SQL,
- viewLogic=clean_query(csql[tableau_constant.QUERY]),
+ viewLanguage=c.SQL,
+ viewLogic=clean_query(csql[c.QUERY]),
)
dataset_snapshot.aspects.append(view_properties)
yield self.get_metadata_change_event(dataset_snapshot)
yield self.get_metadata_change_proposal(
dataset_snapshot.urn,
- aspect_name=tableau_constant.SUB_TYPES,
- aspect=SubTypesClass(
- typeNames=[DatasetSubTypes.VIEW, tableau_constant.CUSTOM_SQL]
- ),
+ aspect_name=c.SUB_TYPES,
+ aspect=SubTypesClass(typeNames=[DatasetSubTypes.VIEW, c.CUSTOM_SQL]),
)
def get_schema_metadata_for_custom_sql(
@@ -1359,21 +1335,19 @@ def get_schema_metadata_for_custom_sql(
for field in columns:
# Datasource fields
- if field.get(tableau_constant.NAME) is None:
+ if field.get(c.NAME) is None:
self.report.num_csql_field_skipped_no_name += 1
logger.warning(
- f"Skipping field {field[tableau_constant.ID]} from schema since its name is none"
+ f"Skipping field {field[c.ID]} from schema since its name is none"
)
continue
- nativeDataType = field.get(
- tableau_constant.REMOTE_TYPE, tableau_constant.UNKNOWN
- )
+ nativeDataType = field.get(c.REMOTE_TYPE, c.UNKNOWN)
TypeClass = FIELD_TYPE_MAPPING.get(nativeDataType, NullTypeClass)
schema_field = SchemaField(
- fieldPath=field[tableau_constant.NAME],
+ fieldPath=field[c.NAME],
type=SchemaFieldDataType(type=TypeClass()),
nativeDataType=nativeDataType,
- description=field.get(tableau_constant.DESCRIPTION),
+ description=field.get(c.DESCRIPTION),
)
fields.append(schema_field)
@@ -1391,28 +1365,25 @@ def _get_published_datasource_project_luid(self, ds: dict) -> Optional[str]:
# This is fallback in case "get all datasources" query fails for some reason.
# It is possible due to https://github.com/tableau/server-client-python/issues/1210
if (
- ds.get(tableau_constant.LUID)
- and ds[tableau_constant.LUID] not in self.datasource_project_map.keys()
+ ds.get(c.LUID)
+ and ds[c.LUID] not in self.datasource_project_map.keys()
and self.report.get_all_datasources_query_failed
):
logger.debug(
- f"published datasource {ds.get(tableau_constant.NAME)} project_luid not found."
- f" Running get datasource query for {ds[tableau_constant.LUID]}"
+ f"published datasource {ds.get(c.NAME)} project_luid not found."
+ f" Running get datasource query for {ds[c.LUID]}"
)
# Query and update self.datasource_project_map with luid
- self._query_published_datasource_for_project_luid(ds[tableau_constant.LUID])
+ self._query_published_datasource_for_project_luid(ds[c.LUID])
if (
- ds.get(tableau_constant.LUID)
- and ds[tableau_constant.LUID] in self.datasource_project_map.keys()
- and self.datasource_project_map[ds[tableau_constant.LUID]]
- in self.tableau_project_registry
+ ds.get(c.LUID)
+ and ds[c.LUID] in self.datasource_project_map.keys()
+ and self.datasource_project_map[ds[c.LUID]] in self.tableau_project_registry
):
- return self.datasource_project_map[ds[tableau_constant.LUID]]
+ return self.datasource_project_map[ds[c.LUID]]
- logger.debug(
- f"published datasource {ds.get(tableau_constant.NAME)} project_luid not found"
- )
+ logger.debug(f"published datasource {ds.get(c.NAME)} project_luid not found")
return None
@@ -1437,60 +1408,52 @@ def _query_published_datasource_for_project_luid(self, ds_luid: str) -> None:
logger.debug("Error stack trace", exc_info=True)
def _get_workbook_project_luid(self, wb: dict) -> Optional[str]:
- if wb.get(tableau_constant.LUID) and self.workbook_project_map.get(
- wb[tableau_constant.LUID]
- ):
- return self.workbook_project_map[wb[tableau_constant.LUID]]
+ if wb.get(c.LUID) and self.workbook_project_map.get(wb[c.LUID]):
+ return self.workbook_project_map[wb[c.LUID]]
- logger.debug(f"workbook {wb.get(tableau_constant.NAME)} project_luid not found")
+ logger.debug(f"workbook {wb.get(c.NAME)} project_luid not found")
return None
def _get_embedded_datasource_project_luid(self, ds: dict) -> Optional[str]:
- if ds.get(tableau_constant.WORKBOOK):
+ if ds.get(c.WORKBOOK):
project_luid: Optional[str] = self._get_workbook_project_luid(
- ds[tableau_constant.WORKBOOK]
+ ds[c.WORKBOOK]
)
if project_luid and project_luid in self.tableau_project_registry:
return project_luid
- logger.debug(
- f"embedded datasource {ds.get(tableau_constant.NAME)} project_luid not found"
- )
+ logger.debug(f"embedded datasource {ds.get(c.NAME)} project_luid not found")
return None
def _get_datasource_project_luid(self, ds: dict) -> Optional[str]:
# Only published and embedded data-sources are supported
- ds_type: Optional[str] = ds.get(tableau_constant.TYPE_NAME)
+ ds_type: Optional[str] = ds.get(c.TYPE_NAME)
if ds_type not in (
- tableau_constant.PUBLISHED_DATA_SOURCE,
- tableau_constant.EMBEDDED_DATA_SOURCE,
+ c.PUBLISHED_DATA_SOURCE,
+ c.EMBEDDED_DATA_SOURCE,
):
logger.debug(
- f"datasource {ds.get(tableau_constant.NAME)} type {ds.get(tableau_constant.TYPE_NAME)} is "
+ f"datasource {ds.get(c.NAME)} type {ds.get(c.TYPE_NAME)} is "
f"unsupported"
)
return None
func_selector: Any = {
- tableau_constant.PUBLISHED_DATA_SOURCE: self._get_published_datasource_project_luid,
- tableau_constant.EMBEDDED_DATA_SOURCE: self._get_embedded_datasource_project_luid,
+ c.PUBLISHED_DATA_SOURCE: self._get_published_datasource_project_luid,
+ c.EMBEDDED_DATA_SOURCE: self._get_embedded_datasource_project_luid,
}
return func_selector[ds_type](ds)
@staticmethod
def _get_datasource_project_name(ds: dict) -> Optional[str]:
- if ds.get(
- tableau_constant.TYPE_NAME
- ) == tableau_constant.EMBEDDED_DATA_SOURCE and ds.get(
- tableau_constant.WORKBOOK
- ):
- return ds[tableau_constant.WORKBOOK].get(tableau_constant.PROJECT_NAME)
- if ds.get(tableau_constant.TYPE_NAME) == tableau_constant.PUBLISHED_DATA_SOURCE:
- return ds.get(tableau_constant.PROJECT_NAME)
+ if ds.get(c.TYPE_NAME) == c.EMBEDDED_DATA_SOURCE and ds.get(c.WORKBOOK):
+ return ds[c.WORKBOOK].get(c.PROJECT_NAME)
+ if ds.get(c.TYPE_NAME) == c.PUBLISHED_DATA_SOURCE:
+ return ds.get(c.PROJECT_NAME)
return None
def _get_project_browse_path_name(self, ds: dict) -> Optional[str]:
@@ -1502,7 +1465,7 @@ def _get_project_browse_path_name(self, ds: dict) -> Optional[str]:
project_luid = self._get_datasource_project_luid(ds)
if project_luid is None:
logger.warning(
- f"Could not load project hierarchy for datasource {ds.get(tableau_constant.NAME)}. Please check permissions."
+ f"Could not load project hierarchy for datasource {ds.get(c.NAME)}. Please check permissions."
)
logger.debug(f"datasource = {ds}")
return None
@@ -1515,7 +1478,7 @@ def _create_lineage_to_upstream_tables(
# This adds an edge to upstream DatabaseTables using `upstreamTables`
upstream_tables, _ = self.get_upstream_tables(
tables,
- datasource.get(tableau_constant.NAME) or "",
+ datasource.get(c.NAME) or "",
self._get_project_browse_path_name(datasource),
is_custom_sql=True,
)
@@ -1524,7 +1487,7 @@ def _create_lineage_to_upstream_tables(
upstream_lineage = UpstreamLineage(upstreams=upstream_tables)
yield self.get_metadata_change_proposal(
csql_urn,
- aspect_name=tableau_constant.UPSTREAM_LINEAGE,
+ aspect_name=c.UPSTREAM_LINEAGE,
aspect=upstream_lineage,
)
@@ -1547,22 +1510,19 @@ def parse_custom_sql(
]
],
) -> Optional["SqlParsingResult"]:
- database_info = datasource.get(tableau_constant.DATABASE) or {}
+ database_info = datasource.get(c.DATABASE) or {}
- if datasource.get(tableau_constant.IS_UNSUPPORTED_CUSTOM_SQL) in (None, False):
+ if datasource.get(c.IS_UNSUPPORTED_CUSTOM_SQL) in (None, False):
logger.debug(f"datasource {datasource_urn} is not created from custom sql")
return None
- if (
- tableau_constant.NAME not in database_info
- or tableau_constant.CONNECTION_TYPE not in database_info
- ):
+ if c.NAME not in database_info or c.CONNECTION_TYPE not in database_info:
logger.debug(
f"database information is missing from datasource {datasource_urn}"
)
return None
- query = datasource.get(tableau_constant.QUERY)
+ query = datasource.get(c.QUERY)
if query is None:
logger.debug(
f"raw sql query is not available for datasource {datasource_urn}"
@@ -1571,13 +1531,13 @@ def parse_custom_sql(
logger.debug(f"Parsing sql={query}")
- upstream_db = database_info.get(tableau_constant.NAME)
+ upstream_db = database_info.get(c.NAME)
if func_overridden_info is not None:
# Override the information as per configuration
upstream_db, platform_instance, platform, _ = func_overridden_info(
- database_info[tableau_constant.CONNECTION_TYPE],
- database_info.get(tableau_constant.NAME),
+ database_info[c.CONNECTION_TYPE],
+ database_info.get(c.NAME),
self.config.platform_instance_map,
self.config.lineage_overrides,
)
@@ -1631,7 +1591,7 @@ def _create_lineage_from_unsupported_csql(
yield self.get_metadata_change_proposal(
csql_urn,
- aspect_name=tableau_constant.UPSTREAM_LINEAGE,
+ aspect_name=c.UPSTREAM_LINEAGE,
aspect=upstream_lineage,
)
@@ -1642,10 +1602,10 @@ def _get_schema_metadata_for_datasource(
for field in datasource_fields:
# check datasource - custom sql relations from a field being referenced
self._track_custom_sql_ids(field)
- if field.get(tableau_constant.NAME) is None:
+ if field.get(c.NAME) is None:
self.report.num_upstream_table_skipped_no_name += 1
logger.warning(
- f"Skipping field {field[tableau_constant.ID]} from schema since its name is none"
+ f"Skipping field {field[c.ID]} from schema since its name is none"
)
continue
@@ -1678,7 +1638,7 @@ def get_metadata_change_proposal(
aspect: Union["UpstreamLineage", "SubTypesClass"],
) -> MetadataWorkUnit:
return MetadataChangeProposalWrapper(
- entityType=tableau_constant.DATASET,
+ entityType=c.DATASET,
changeType=ChangeTypeClass.UPSERT,
entityUrn=urn,
aspectName=aspect_name,
@@ -1696,10 +1656,8 @@ def emit_datasource(
datasource_info = datasource
browse_path = self._get_project_browse_path_name(datasource)
- logger.debug(
- f"datasource {datasource.get(tableau_constant.NAME)} browse-path {browse_path}"
- )
- datasource_id = datasource[tableau_constant.ID]
+ logger.debug(f"datasource {datasource.get(c.NAME)} browse-path {browse_path}")
+ datasource_id = datasource[c.ID]
datasource_urn = builder.make_dataset_urn_with_platform_instance(
self.platform, datasource_id, self.config.platform_instance, self.config.env
)
@@ -1713,13 +1671,10 @@ def emit_datasource(
# Browse path
- if (
- browse_path
- and is_embedded_ds
- and workbook
- and workbook.get(tableau_constant.NAME)
- ):
- browse_path = f"{browse_path}/{workbook[tableau_constant.NAME].replace('/', REPLACE_SLASH_CHAR)}"
+ if browse_path and is_embedded_ds and workbook and workbook.get(c.NAME):
+ browse_path = (
+ f"{browse_path}/{workbook[c.NAME].replace('/', REPLACE_SLASH_CHAR)}"
+ )
if browse_path:
browse_paths = BrowsePathsClass(
@@ -1729,12 +1684,10 @@ def emit_datasource(
# Ownership
owner = (
- self._get_ownership(
- datasource_info[tableau_constant.OWNER][tableau_constant.USERNAME]
- )
+ self._get_ownership(datasource_info[c.OWNER][c.USERNAME])
if datasource_info
- and datasource_info.get(tableau_constant.OWNER)
- and datasource_info[tableau_constant.OWNER].get(tableau_constant.USERNAME)
+ and datasource_info.get(c.OWNER)
+ and datasource_info[c.OWNER].get(c.USERNAME)
else None
)
if owner is not None:
@@ -1742,24 +1695,22 @@ def emit_datasource(
# Dataset properties
dataset_props = DatasetPropertiesClass(
- name=datasource.get(tableau_constant.NAME),
- description=datasource.get(tableau_constant.DESCRIPTION),
+ name=datasource.get(c.NAME),
+ description=datasource.get(c.DESCRIPTION),
customProperties=self.get_custom_props_from_dict(
datasource,
[
- tableau_constant.HAS_EXTRACTS,
- tableau_constant.EXTRACT_LAST_REFRESH_TIME,
- tableau_constant.EXTRACT_LAST_INCREMENTAL_UPDATE_TIME,
- tableau_constant.EXTRACT_LAST_UPDATE_TIME,
+ c.HAS_EXTRACTS,
+ c.EXTRACT_LAST_REFRESH_TIME,
+ c.EXTRACT_LAST_INCREMENTAL_UPDATE_TIME,
+ c.EXTRACT_LAST_UPDATE_TIME,
],
),
)
dataset_snapshot.aspects.append(dataset_props)
# Upstream Tables
- if datasource.get(tableau_constant.UPSTREAM_TABLES) or datasource.get(
- tableau_constant.UPSTREAM_DATA_SOURCES
- ):
+ if datasource.get(c.UPSTREAM_TABLES) or datasource.get(c.UPSTREAM_DATA_SOURCES):
# datasource -> db table relations
(
upstream_tables,
@@ -1779,13 +1730,13 @@ def emit_datasource(
)
yield self.get_metadata_change_proposal(
datasource_urn,
- aspect_name=tableau_constant.UPSTREAM_LINEAGE,
+ aspect_name=c.UPSTREAM_LINEAGE,
aspect=upstream_lineage,
)
# Datasource Fields
schema_metadata = self._get_schema_metadata_for_datasource(
- datasource.get(tableau_constant.FIELDS, [])
+ datasource.get(c.FIELDS, [])
)
if schema_metadata is not None:
dataset_snapshot.aspects.append(schema_metadata)
@@ -1793,7 +1744,7 @@ def emit_datasource(
yield self.get_metadata_change_event(dataset_snapshot)
yield self.get_metadata_change_proposal(
dataset_snapshot.urn,
- aspect_name=tableau_constant.SUB_TYPES,
+ aspect_name=c.SUB_TYPES,
aspect=SubTypesClass(
typeNames=(
["Embedded Data Source"]
@@ -1809,7 +1760,7 @@ def emit_datasource(
if container_key is not None:
yield from add_entity_to_container(
container_key,
- tableau_constant.DATASET,
+ c.DATASET,
dataset_snapshot.urn,
)
@@ -1822,10 +1773,10 @@ def _get_datasource_container_key(
container_key: Optional[ContainerKey] = None
if is_embedded_ds: # It is embedded then parent is container is workbook
if workbook is not None:
- container_key = self.gen_workbook_key(workbook[tableau_constant.ID])
+ container_key = self.gen_workbook_key(workbook[c.ID])
else:
logger.warning(
- f"Parent container not set for embedded datasource {datasource[tableau_constant.ID]}"
+ f"Parent container not set for embedded datasource {datasource[c.ID]}"
)
else:
parent_project_luid = self._get_published_datasource_project_luid(
@@ -1836,17 +1787,19 @@ def _get_datasource_container_key(
container_key = self.gen_project_key(parent_project_luid)
else:
logger.warning(
- f"Parent container not set for published datasource {datasource[tableau_constant.ID]}"
+ f"Parent container not set for published datasource {datasource[c.ID]}"
)
return container_key
def emit_published_datasources(self) -> Iterable[MetadataWorkUnit]:
- datasource_filter = f"{tableau_constant.ID_WITH_IN}: {json.dumps(self.datasource_ids_being_used)}"
+ datasource_filter = (
+ f"{c.ID_WITH_IN}: {json.dumps(self.datasource_ids_being_used)}"
+ )
for datasource in self.get_connection_objects(
published_datasource_graphql_query,
- tableau_constant.PUBLISHED_DATA_SOURCES_CONNECTION,
+ c.PUBLISHED_DATA_SOURCES_CONNECTION,
datasource_filter,
):
yield from self.emit_datasource(datasource)
@@ -1855,11 +1808,13 @@ def emit_upstream_tables(self) -> Iterable[MetadataWorkUnit]:
database_table_id_to_urn_map: Dict[str, str] = dict()
for urn, tbl in self.database_tables.items():
database_table_id_to_urn_map[tbl.id] = urn
- tables_filter = f"{tableau_constant.ID_WITH_IN}: {json.dumps(list(database_table_id_to_urn_map.keys()))}"
+ tables_filter = (
+ f"{c.ID_WITH_IN}: {json.dumps(list(database_table_id_to_urn_map.keys()))}"
+ )
for table in self.get_connection_objects(
database_tables_graphql_query,
- tableau_constant.DATABASE_TABLES_CONNECTION,
+ c.DATABASE_TABLES_CONNECTION,
tables_filter,
):
yield from self.emit_table(table, database_table_id_to_urn_map)
@@ -1867,11 +1822,9 @@ def emit_upstream_tables(self) -> Iterable[MetadataWorkUnit]:
def emit_table(
self, table: dict, database_table_id_to_urn_map: Dict[str, str]
) -> Iterable[MetadataWorkUnit]:
- database_table = self.database_tables[
- database_table_id_to_urn_map[table[tableau_constant.ID]]
- ]
- columns = table.get(tableau_constant.COLUMNS, [])
- is_embedded = table.get(tableau_constant.IS_EMBEDDED) or False
+ database_table = self.database_tables[database_table_id_to_urn_map[table[c.ID]]]
+ columns = table.get(c.COLUMNS, [])
+ is_embedded = table.get(c.IS_EMBEDDED) or False
if not is_embedded and not self.config.ingest_tables_external:
logger.debug(
f"Skipping external table {database_table.urn} as ingest_tables_external is set to False"
@@ -1907,21 +1860,19 @@ def get_schema_metadata_for_table(
if columns:
fields = []
for field in columns:
- if field.get(tableau_constant.NAME) is None:
+ if field.get(c.NAME) is None:
self.report.num_table_field_skipped_no_name += 1
logger.warning(
- f"Skipping field {field[tableau_constant.ID]} from schema since its name is none"
+ f"Skipping field {field[c.ID]} from schema since its name is none"
)
continue
- nativeDataType = field.get(
- tableau_constant.REMOTE_TYPE, tableau_constant.UNKNOWN
- )
+ nativeDataType = field.get(c.REMOTE_TYPE, c.UNKNOWN)
TypeClass = FIELD_TYPE_MAPPING.get(nativeDataType, NullTypeClass)
schema_field = SchemaField(
- fieldPath=field[tableau_constant.NAME],
+ fieldPath=field[c.NAME],
type=SchemaFieldDataType(type=TypeClass()),
- description=field.get(tableau_constant.DESCRIPTION),
+ description=field.get(c.DESCRIPTION),
nativeDataType=nativeDataType,
)
@@ -1941,11 +1892,9 @@ def get_schema_metadata_for_table(
def get_sheetwise_upstream_datasources(self, sheet: dict) -> set:
sheet_upstream_datasources = set()
- for field in sheet.get(tableau_constant.DATA_SOURCE_FIELDS) or []:
- if field and field.get(tableau_constant.DATA_SOURCE):
- sheet_upstream_datasources.add(
- field[tableau_constant.DATA_SOURCE][tableau_constant.ID]
- )
+ for field in sheet.get(c.DATA_SOURCE_FIELDS) or []:
+ if field and field.get(c.DATA_SOURCE):
+ sheet_upstream_datasources.add(field[c.DATA_SOURCE][c.ID])
return sheet_upstream_datasources
@@ -1961,20 +1910,20 @@ def _create_datahub_chart_usage_stat(
def _get_chart_stat_wu(
self, sheet: dict, sheet_urn: str
) -> Optional[MetadataWorkUnit]:
- luid: Optional[str] = sheet.get(tableau_constant.LUID)
+ luid: Optional[str] = sheet.get(c.LUID)
if luid is None:
logger.debug(
"stat:luid is none for sheet %s(id:%s)",
- sheet.get(tableau_constant.NAME),
- sheet.get(tableau_constant.ID),
+ sheet.get(c.NAME),
+ sheet.get(c.ID),
)
return None
usage_stat: Optional[UsageStat] = self.tableau_stat_registry.get(luid)
if usage_stat is None:
logger.debug(
"stat:UsageStat is not available in tableau_stat_registry for sheet %s(id:%s)",
- sheet.get(tableau_constant.NAME),
- sheet.get(tableau_constant.ID),
+ sheet.get(c.NAME),
+ sheet.get(c.ID),
)
return None
@@ -1983,8 +1932,8 @@ def _get_chart_stat_wu(
)
logger.debug(
"stat: Chart usage stat work unit is created for %s(id:%s)",
- sheet.get(tableau_constant.NAME),
- sheet.get(tableau_constant.ID),
+ sheet.get(c.NAME),
+ sheet.get(c.ID),
)
return MetadataChangeProposalWrapper(
aspect=aspect,
@@ -1992,22 +1941,20 @@ def _get_chart_stat_wu(
).as_workunit()
def emit_sheets(self) -> Iterable[MetadataWorkUnit]:
- sheets_filter = f"{tableau_constant.ID_WITH_IN}: {json.dumps(self.sheet_ids)}"
+ sheets_filter = f"{c.ID_WITH_IN}: {json.dumps(self.sheet_ids)}"
for sheet in self.get_connection_objects(
sheet_graphql_query,
- tableau_constant.SHEETS_CONNECTION,
+ c.SHEETS_CONNECTION,
sheets_filter,
):
- yield from self.emit_sheets_as_charts(
- sheet, sheet.get(tableau_constant.WORKBOOK)
- )
+ yield from self.emit_sheets_as_charts(sheet, sheet.get(c.WORKBOOK))
def emit_sheets_as_charts(
self, sheet: dict, workbook: Optional[Dict]
) -> Iterable[MetadataWorkUnit]:
sheet_urn: str = builder.make_chart_urn(
- self.platform, sheet[tableau_constant.ID], self.config.platform_instance
+ self.platform, sheet[c.ID], self.config.platform_instance
)
chart_snapshot = ChartSnapshot(
urn=sheet_urn,
@@ -2015,34 +1962,32 @@ def emit_sheets_as_charts(
)
creator: Optional[str] = None
- if workbook is not None and workbook.get(tableau_constant.OWNER) is not None:
- creator = workbook[tableau_constant.OWNER].get(tableau_constant.USERNAME)
- created_at = sheet.get(tableau_constant.CREATED_AT, datetime.now())
- updated_at = sheet.get(tableau_constant.UPDATED_AT, datetime.now())
+ if workbook is not None and workbook.get(c.OWNER) is not None:
+ creator = workbook[c.OWNER].get(c.USERNAME)
+ created_at = sheet.get(c.CREATED_AT, datetime.now())
+ updated_at = sheet.get(c.UPDATED_AT, datetime.now())
last_modified = self.get_last_modified(creator, created_at, updated_at)
- if sheet.get(tableau_constant.PATH):
+ if sheet.get(c.PATH):
site_part = f"/site/{self.config.site}" if self.config.site else ""
- sheet_external_url = f"{self.config.connect_uri}/#{site_part}/views/{sheet.get(tableau_constant.PATH)}"
- elif (
- sheet.get(tableau_constant.CONTAINED_IN_DASHBOARDS) is not None
- and len(sheet[tableau_constant.CONTAINED_IN_DASHBOARDS]) > 0
- and sheet[tableau_constant.CONTAINED_IN_DASHBOARDS][0] is not None
- and sheet[tableau_constant.CONTAINED_IN_DASHBOARDS][0].get(
- tableau_constant.PATH
+ sheet_external_url = (
+ f"{self.config.connect_uri}/#{site_part}/views/{sheet.get(c.PATH)}"
)
+ elif (
+ sheet.get(c.CONTAINED_IN_DASHBOARDS) is not None
+ and len(sheet[c.CONTAINED_IN_DASHBOARDS]) > 0
+ and sheet[c.CONTAINED_IN_DASHBOARDS][0] is not None
+ and sheet[c.CONTAINED_IN_DASHBOARDS][0].get(c.PATH)
):
# sheet contained in dashboard
site_part = f"/t/{self.config.site}" if self.config.site else ""
- dashboard_path = sheet[tableau_constant.CONTAINED_IN_DASHBOARDS][0][
- tableau_constant.PATH
- ]
- sheet_external_url = f"{self.config.connect_uri}{site_part}/authoring/{dashboard_path}/{sheet.get(tableau_constant.NAME, '')}"
+ dashboard_path = sheet[c.CONTAINED_IN_DASHBOARDS][0][c.PATH]
+ sheet_external_url = f"{self.config.connect_uri}{site_part}/authoring/{dashboard_path}/{sheet.get(c.NAME, '')}"
else:
# hidden or viz-in-tooltip sheet
sheet_external_url = None
input_fields: List[InputField] = []
- if sheet.get(tableau_constant.DATA_SOURCE_FIELDS):
+ if sheet.get(c.DATA_SOURCE_FIELDS):
self.populate_sheet_upstream_fields(sheet, input_fields)
# datasource urn
@@ -2060,15 +2005,13 @@ def emit_sheets_as_charts(
# Chart Info
chart_info = ChartInfoClass(
description="",
- title=sheet.get(tableau_constant.NAME) or "",
+ title=sheet.get(c.NAME) or "",
lastModified=last_modified,
externalUrl=sheet_external_url
if self.config.ingest_external_links_for_charts
else None,
inputs=sorted(datasource_urn),
- customProperties=self.get_custom_props_from_dict(
- sheet, [tableau_constant.LUID]
- ),
+ customProperties=self.get_custom_props_from_dict(sheet, [c.LUID]),
)
chart_snapshot.aspects.append(chart_info)
# chart_snapshot doesn't support the stat aspect as list element and hence need to emit MCP
@@ -2083,7 +2026,7 @@ def emit_sheets_as_charts(
chart_snapshot.aspects.append(browse_paths)
else:
logger.warning(
- f"Could not set browse path for workbook {sheet[tableau_constant.ID]}. Please check permissions."
+ f"Could not set browse path for workbook {sheet[c.ID]}. Please check permissions."
)
# Ownership
@@ -2107,9 +2050,7 @@ def emit_sheets_as_charts(
)
if workbook is not None:
yield from add_entity_to_container(
- self.gen_workbook_key(workbook[tableau_constant.ID]),
- tableau_constant.CHART,
- chart_snapshot.urn,
+ self.gen_workbook_key(workbook[c.ID]), c.CHART, chart_snapshot.urn
)
if input_fields:
@@ -2134,14 +2075,12 @@ def _get_project_path(self, project: TableauProject) -> str:
def populate_sheet_upstream_fields(
self, sheet: dict, input_fields: List[InputField]
) -> None:
- for field in sheet.get(tableau_constant.DATA_SOURCE_FIELDS): # type: ignore
+ for field in sheet.get(c.DATA_SOURCE_FIELDS): # type: ignore
if not field:
continue
- name = field.get(tableau_constant.NAME)
+ name = field.get(c.NAME)
upstream_ds_id = (
- field.get(tableau_constant.DATA_SOURCE)[tableau_constant.ID]
- if field.get(tableau_constant.DATA_SOURCE)
- else None
+ field.get(c.DATA_SOURCE)[c.ID] if field.get(c.DATA_SOURCE) else None
)
if name and upstream_ds_id:
input_fields.append(
@@ -2162,10 +2101,8 @@ def populate_sheet_upstream_fields(
)
def emit_workbook_as_container(self, workbook: Dict) -> Iterable[MetadataWorkUnit]:
- workbook_container_key = self.gen_workbook_key(workbook[tableau_constant.ID])
- creator = workbook.get(tableau_constant.OWNER, {}).get(
- tableau_constant.USERNAME
- )
+ workbook_container_key = self.gen_workbook_key(workbook[c.ID])
+ creator = workbook.get(c.OWNER, {}).get(c.USERNAME)
owner_urn = (
builder.make_user_urn(creator)
@@ -2191,17 +2128,17 @@ def emit_workbook_as_container(self, workbook: Dict) -> Iterable[MetadataWorkUni
if project_luid and project_luid in self.tableau_project_registry.keys():
parent_key = self.gen_project_key(project_luid)
else:
- workbook_id: Optional[str] = workbook.get(tableau_constant.ID)
- workbook_name: Optional[str] = workbook.get(tableau_constant.NAME)
+ workbook_id: Optional[str] = workbook.get(c.ID)
+ workbook_name: Optional[str] = workbook.get(c.NAME)
logger.warning(
f"Could not load project hierarchy for workbook {workbook_name}({workbook_id}). Please check permissions."
)
yield from gen_containers(
container_key=workbook_container_key,
- name=workbook.get(tableau_constant.NAME) or "",
+ name=workbook.get(c.NAME) or "",
parent_container_key=parent_key,
- description=workbook.get(tableau_constant.DESCRIPTION),
+ description=workbook.get(c.DESCRIPTION),
sub_types=[BIContainerSubTypes.TABLEAU_WORKBOOK],
owner_urn=owner_urn,
external_url=workbook_external_url,
@@ -2237,20 +2174,20 @@ def _create_datahub_dashboard_usage_stat(
def _get_dashboard_stat_wu(
self, dashboard: dict, dashboard_urn: str
) -> Optional[MetadataWorkUnit]:
- luid: Optional[str] = dashboard.get(tableau_constant.LUID)
+ luid: Optional[str] = dashboard.get(c.LUID)
if luid is None:
logger.debug(
"stat:luid is none for dashboard %s(id:%s)",
- dashboard.get(tableau_constant.NAME),
- dashboard.get(tableau_constant.ID),
+ dashboard.get(c.NAME),
+ dashboard.get(c.ID),
)
return None
usage_stat: Optional[UsageStat] = self.tableau_stat_registry.get(luid)
if usage_stat is None:
logger.debug(
"stat:UsageStat is not available in tableau_stat_registry for dashboard %s(id:%s)",
- dashboard.get(tableau_constant.NAME),
- dashboard.get(tableau_constant.ID),
+ dashboard.get(c.NAME),
+ dashboard.get(c.ID),
)
return None
@@ -2259,8 +2196,8 @@ def _get_dashboard_stat_wu(
)
logger.debug(
"stat: Dashboard usage stat is created for %s(id:%s)",
- dashboard.get(tableau_constant.NAME),
- dashboard.get(tableau_constant.ID),
+ dashboard.get(c.NAME),
+ dashboard.get(c.ID),
)
return MetadataChangeProposalWrapper(
@@ -2288,26 +2225,20 @@ def new_work_unit(self, mcp: MetadataChangeProposalWrapper) -> MetadataWorkUnit:
)
def emit_dashboards(self) -> Iterable[MetadataWorkUnit]:
- dashboards_filter = (
- f"{tableau_constant.ID_WITH_IN}: {json.dumps(self.dashboard_ids)}"
- )
+ dashboards_filter = f"{c.ID_WITH_IN}: {json.dumps(self.dashboard_ids)}"
for dashboard in self.get_connection_objects(
dashboard_graphql_query,
- tableau_constant.DASHBOARDS_CONNECTION,
+ c.DASHBOARDS_CONNECTION,
dashboards_filter,
):
- yield from self.emit_dashboard(
- dashboard, dashboard.get(tableau_constant.WORKBOOK)
- )
+ yield from self.emit_dashboard(dashboard, dashboard.get(c.WORKBOOK))
def get_tags(self, obj: dict) -> Optional[List[str]]:
- tag_list = obj.get(tableau_constant.TAGS, [])
+ tag_list = obj.get(c.TAGS, [])
if tag_list and self.config.ingest_tags:
tag_list_str = [
- t[tableau_constant.NAME]
- for t in tag_list
- if t is not None and t.get(tableau_constant.NAME)
+ t[c.NAME] for t in tag_list if t is not None and t.get(c.NAME)
]
return tag_list_str
@@ -2317,7 +2248,7 @@ def emit_dashboard(
self, dashboard: dict, workbook: Optional[Dict]
) -> Iterable[MetadataWorkUnit]:
dashboard_urn: str = builder.make_dashboard_urn(
- self.platform, dashboard[tableau_constant.ID], self.config.platform_instance
+ self.platform, dashboard[c.ID], self.config.platform_instance
)
dashboard_snapshot = DashboardSnapshot(
urn=dashboard_urn,
@@ -2325,26 +2256,28 @@ def emit_dashboard(
)
creator: Optional[str] = None
- if workbook is not None and workbook.get(tableau_constant.OWNER) is not None:
- creator = workbook[tableau_constant.OWNER].get(tableau_constant.USERNAME)
- created_at = dashboard.get(tableau_constant.CREATED_AT, datetime.now())
- updated_at = dashboard.get(tableau_constant.UPDATED_AT, datetime.now())
+ if workbook is not None and workbook.get(c.OWNER) is not None:
+ creator = workbook[c.OWNER].get(c.USERNAME)
+ created_at = dashboard.get(c.CREATED_AT, datetime.now())
+ updated_at = dashboard.get(c.UPDATED_AT, datetime.now())
last_modified = self.get_last_modified(creator, created_at, updated_at)
site_part = f"/site/{self.config.site}" if self.config.site else ""
- dashboard_external_url = f"{self.config.connect_uri}/#{site_part}/views/{dashboard.get(tableau_constant.PATH, '')}"
+ dashboard_external_url = (
+ f"{self.config.connect_uri}/#{site_part}/views/{dashboard.get(c.PATH, '')}"
+ )
title = (
- dashboard[tableau_constant.NAME].replace("/", REPLACE_SLASH_CHAR)
- if dashboard.get(tableau_constant.NAME)
+ dashboard[c.NAME].replace("/", REPLACE_SLASH_CHAR)
+ if dashboard.get(c.NAME)
else ""
)
chart_urns = [
builder.make_chart_urn(
self.platform,
- sheet.get(tableau_constant.ID),
+ sheet.get(c.ID),
self.config.platform_instance,
)
- for sheet in dashboard.get(tableau_constant.SHEETS, [])
+ for sheet in dashboard.get(c.SHEETS, [])
]
dashboard_info_class = DashboardInfoClass(
description="",
@@ -2354,9 +2287,7 @@ def emit_dashboard(
dashboardUrl=dashboard_external_url
if self.config.ingest_external_links_for_dashboards
else None,
- customProperties=self.get_custom_props_from_dict(
- dashboard, [tableau_constant.LUID]
- ),
+ customProperties=self.get_custom_props_from_dict(dashboard, [c.LUID]),
)
dashboard_snapshot.aspects.append(dashboard_info_class)
@@ -2377,7 +2308,7 @@ def emit_dashboard(
dashboard_snapshot.aspects.append(browse_paths)
else:
logger.warning(
- f"Could not set browse path for dashboard {dashboard[tableau_constant.ID]}. Please check permissions."
+ f"Could not set browse path for dashboard {dashboard[c.ID]}. Please check permissions."
)
# Ownership
@@ -2397,8 +2328,8 @@ def emit_dashboard(
if workbook is not None:
yield from add_entity_to_container(
- self.gen_workbook_key(workbook[tableau_constant.ID]),
- tableau_constant.DASHBOARD,
+ self.gen_workbook_key(workbook[c.ID]),
+ c.DASHBOARD,
dashboard_snapshot.urn,
)
@@ -2406,38 +2337,40 @@ def get_browse_paths_aspect(
self, workbook: Optional[Dict]
) -> Optional[BrowsePathsClass]:
browse_paths: Optional[BrowsePathsClass] = None
- if workbook and workbook.get(tableau_constant.NAME):
+ if workbook and workbook.get(c.NAME):
project_luid: Optional[str] = self._get_workbook_project_luid(workbook)
if project_luid in self.tableau_project_registry:
browse_paths = BrowsePathsClass(
paths=[
f"/{self.platform}/{self._project_luid_to_browse_path_name(project_luid)}"
- f"/{workbook[tableau_constant.NAME].replace('/', REPLACE_SLASH_CHAR)}"
+ f"/{workbook[c.NAME].replace('/', REPLACE_SLASH_CHAR)}"
]
)
- elif workbook.get(tableau_constant.PROJECT_NAME):
+ elif workbook.get(c.PROJECT_NAME):
# browse path
browse_paths = BrowsePathsClass(
paths=[
- f"/{self.platform}/{workbook[tableau_constant.PROJECT_NAME].replace('/', REPLACE_SLASH_CHAR)}"
- f"/{workbook[tableau_constant.NAME].replace('/', REPLACE_SLASH_CHAR)}"
+ f"/{self.platform}/{workbook[c.PROJECT_NAME].replace('/', REPLACE_SLASH_CHAR)}"
+ f"/{workbook[c.NAME].replace('/', REPLACE_SLASH_CHAR)}"
]
)
return browse_paths
def emit_embedded_datasources(self) -> Iterable[MetadataWorkUnit]:
- datasource_filter = f"{tableau_constant.ID_WITH_IN}: {json.dumps(self.embedded_datasource_ids_being_used)}"
+ datasource_filter = (
+ f"{c.ID_WITH_IN}: {json.dumps(self.embedded_datasource_ids_being_used)}"
+ )
for datasource in self.get_connection_objects(
embedded_datasource_graphql_query,
- tableau_constant.EMBEDDED_DATA_SOURCES_CONNECTION,
+ c.EMBEDDED_DATA_SOURCES_CONNECTION,
datasource_filter,
):
yield from self.emit_datasource(
datasource,
- datasource.get(tableau_constant.WORKBOOK),
+ datasource.get(c.WORKBOOK),
is_embedded_ds=True,
)
@@ -2483,7 +2416,7 @@ def emit_project_containers(self) -> Iterable[MetadataWorkUnit]:
container_key=self.gen_project_key(_id),
name=project.name,
description=project.description,
- sub_types=[tableau_constant.PROJECT],
+ sub_types=[c.PROJECT],
parent_container_key=self.gen_project_key(project.parent_id)
if project.parent_id
else None,
@@ -2498,7 +2431,7 @@ def emit_project_containers(self) -> Iterable[MetadataWorkUnit]:
yield from gen_containers(
container_key=self.gen_project_key(project.parent_id),
name=cast(str, project.parent_name),
- sub_types=[tableau_constant.PROJECT],
+ sub_types=[c.PROJECT],
)
def get_workunit_processors(self) -> List[Optional[MetadataWorkUnitProcessor]]:
diff --git a/metadata-ingestion/src/datahub/ingestion/source/tableau_common.py b/metadata-ingestion/src/datahub/ingestion/source/tableau_common.py
index 7c4852042ce7c..65d779b7f4516 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/tableau_common.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/tableau_common.py
@@ -8,7 +8,7 @@
import datahub.emitter.mce_builder as builder
from datahub.configuration.common import ConfigModel
-from datahub.ingestion.source import tableau_constant as tc
+from datahub.ingestion.source import tableau_constant as c
from datahub.metadata.com.linkedin.pegasus2avro.dataset import (
DatasetLineageType,
FineGrainedLineage,
@@ -591,12 +591,12 @@ def create(
cls, d: dict, default_schema_map: Optional[Dict[str, str]] = None
) -> "TableauUpstreamReference":
# Values directly from `table` object from Tableau
- database = t_database = d.get(tc.DATABASE, {}).get(tc.NAME)
- schema = t_schema = d.get(tc.SCHEMA)
- table = t_table = d.get(tc.NAME) or ""
- t_full_name = d.get(tc.FULL_NAME)
- t_connection_type = d[tc.CONNECTION_TYPE] # required to generate urn
- t_id = d[tc.ID]
+ database = t_database = d.get(c.DATABASE, {}).get(c.NAME)
+ schema = t_schema = d.get(c.SCHEMA)
+ table = t_table = d.get(c.NAME) or ""
+ t_full_name = d.get(c.FULL_NAME)
+ t_connection_type = d[c.CONNECTION_TYPE] # required to generate urn
+ t_id = d[c.ID]
parsed_full_name = cls.parse_full_name(t_full_name)
if parsed_full_name and len(parsed_full_name) == 3:
From 9174301719122c2597db75c8bb6b60c4d1a74f77 Mon Sep 17 00:00:00 2001
From: sachinsaju <33017477+sachinsaju@users.noreply.github.com>
Date: Thu, 9 Nov 2023 10:37:09 +0530
Subject: [PATCH 079/792] docs: update broken link in metadata-modelling
(#9184)
Co-authored-by: Hyejin Yoon <0327jane@gmail.com>
Co-authored-by: John Joyce
---
docs/modeling/metadata-model.md | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/docs/modeling/metadata-model.md b/docs/modeling/metadata-model.md
index a8958985a0a72..4c97cadc88417 100644
--- a/docs/modeling/metadata-model.md
+++ b/docs/modeling/metadata-model.md
@@ -625,7 +625,7 @@ curl --location --request POST 'http://localhost:8080/analytics?action=getTimese
}
}
```
-For more examples on the complex types of group-by/aggregations, refer to the tests in the group `getAggregatedStats` of [ElasticSearchTimeseriesAspectServiceTest.java](https://github.com/datahub-project/datahub/blob/master/metadata-io/src/test/java/com/linkedin/metadata/timeseries/elastic/ElasticSearchTimeseriesAspectServiceTest.java).
+For more examples on the complex types of group-by/aggregations, refer to the tests in the group `getAggregatedStats` of [TimeseriesAspectServiceTestBase.java](https://github.com/datahub-project/datahub/blob/master/metadata-io/src/test/java/com/linkedin/metadata/timeseries/search/TimeseriesAspectServiceTestBase.java).
From e494a9cc102f863bc51fcf80674bd6d3d36d726c Mon Sep 17 00:00:00 2001
From: Kos Korchak <97058061+kkorchak@users.noreply.github.com>
Date: Thu, 9 Nov 2023 00:23:17 -0500
Subject: [PATCH 080/792] test(): Test policy to create and manage privileges
(#9173)
---
.../tests/privileges/test_privileges.py | 112 +++++++++++++++++-
1 file changed, 111 insertions(+), 1 deletion(-)
diff --git a/smoke-test/tests/privileges/test_privileges.py b/smoke-test/tests/privileges/test_privileges.py
index 740311754678e..d0f00734ae9f3 100644
--- a/smoke-test/tests/privileges/test_privileges.py
+++ b/smoke-test/tests/privileges/test_privileges.py
@@ -114,6 +114,21 @@ def _ensure_can_create_access_token(session, json):
assert ingestion_data["data"]["createAccessToken"]["__typename"] == "AccessToken"
+@tenacity.retry(
+ stop=tenacity.stop_after_attempt(10), wait=tenacity.wait_fixed(sleep_sec)
+)
+def _ensure_can_create_user_policy(session, json):
+ response = session.post(f"{get_frontend_url()}/api/v2/graphql", json=json)
+ response.raise_for_status()
+ res_data = response.json()
+
+ assert res_data
+ assert res_data["data"]
+ assert res_data["data"]["createPolicy"] is not None
+
+ return res_data["data"]["createPolicy"]
+
+
@pytest.mark.dependency(depends=["test_healthchecks"])
def test_privilege_to_create_and_manage_secrets():
@@ -337,4 +352,99 @@ def test_privilege_to_create_and_manage_access_tokens():
# Ensure that user can't create access token after policy is removed
- _ensure_cant_perform_action(user_session, create_access_token,"createAccessToken")
\ No newline at end of file
+ _ensure_cant_perform_action(user_session, create_access_token,"createAccessToken")
+
+
+@pytest.mark.dependency(depends=["test_healthchecks"])
+def test_privilege_to_create_and_manage_policies():
+
+ (admin_user, admin_pass) = get_admin_credentials()
+ admin_session = login_as(admin_user, admin_pass)
+ user_session = login_as("user", "user")
+
+
+ # Verify new user can't create a policy
+ create_policy = {
+ "query": """mutation createPolicy($input: PolicyUpdateInput!) {\n
+ createPolicy(input: $input) }""",
+ "variables": {
+ "input": {
+ "type": "PLATFORM",
+ "name": "Policy Name",
+ "description": "Policy Description",
+ "state": "ACTIVE",
+ "resources": {"filter":{"criteria":[]}},
+ "privileges": ["MANAGE_POLICIES"],
+ "actors": {
+ "users": [],
+ "resourceOwners": False,
+ "allUsers": True,
+ "allGroups": False,
+ },
+ }
+ },
+ }
+
+ _ensure_cant_perform_action(user_session, create_policy,"createPolicy")
+
+
+ # Assign privileges to the new user to create and manage policies
+ admin_policy_urn = create_user_policy("urn:li:corpuser:user", ["MANAGE_POLICIES"], admin_session)
+
+
+ # Verify new user can create and manage policy(create, edit, delete)
+ # Create a policy
+ user_policy_urn = _ensure_can_create_user_policy(user_session, create_policy)
+
+ # Edit a policy
+ edit_policy = {
+ "query": """mutation updatePolicy($urn: String!, $input: PolicyUpdateInput!) {\n
+ updatePolicy(urn: $urn, input: $input) }""",
+ "variables": {
+ "urn": user_policy_urn,
+ "input": {
+ "type": "PLATFORM",
+ "state": "INACTIVE",
+ "name": "Policy Name test",
+ "description": "Policy Description updated",
+ "privileges": ["MANAGE_POLICIES"],
+ "actors": {
+ "users": [],
+ "groups": None,
+ "resourceOwners": False,
+ "allUsers": True,
+ "allGroups": False,
+ "resourceOwnersTypes": None,
+ },
+ },
+ },
+ }
+ edit_policy_response = user_session.post(f"{get_frontend_url()}/api/v2/graphql", json=edit_policy)
+ edit_policy_response.raise_for_status()
+ res_data = edit_policy_response.json()
+
+ assert res_data
+ assert res_data["data"]
+ assert res_data["data"]["updatePolicy"] == user_policy_urn
+
+ # Delete a policy
+ remove_user_policy = {
+ "query": "mutation deletePolicy($urn: String!) {\n deletePolicy(urn: $urn)\n}\n",
+ "variables":{"urn":user_policy_urn}
+ }
+
+ remove_policy_response = user_session.post(f"{get_frontend_url()}/api/v2/graphql", json=remove_user_policy)
+ remove_policy_response.raise_for_status()
+ res_data = remove_policy_response.json()
+
+ assert res_data
+ assert res_data["data"]
+ assert res_data["data"]["deletePolicy"] == user_policy_urn
+
+
+ # Remove the user privilege by admin
+ remove_policy(admin_policy_urn, admin_session)
+
+
+ # Ensure that user can't create a policy after privilege is removed by admin
+ _ensure_cant_perform_action(user_session, create_policy,"createPolicy")
\ No newline at end of file
From 2187d24b54493953ab66b70f9a4b4fe0fd8841e1 Mon Sep 17 00:00:00 2001
From: RyanHolstien
Date: Thu, 9 Nov 2023 13:58:12 -0600
Subject: [PATCH 081/792] docs(security): add security doc to website (#9209)
---
docs-website/generateDocsDir.ts | 1 -
docs-website/sidebars.js | 1 +
2 files changed, 1 insertion(+), 1 deletion(-)
diff --git a/docs-website/generateDocsDir.ts b/docs-website/generateDocsDir.ts
index a321146e10efa..e19f09530665a 100644
--- a/docs-website/generateDocsDir.ts
+++ b/docs-website/generateDocsDir.ts
@@ -125,7 +125,6 @@ function list_markdown_files(): string[] {
/^docker\/(?!README|datahub-upgrade|airflow\/local_airflow)/, // Drop all but a few docker docs.
/^docs\/docker\/README\.md/, // This one is just a pointer to another file.
/^docs\/README\.md/, // This one is just a pointer to the hosted docs site.
- /^SECURITY\.md$/,
/^\s*$/, //Empty string
];
diff --git a/docs-website/sidebars.js b/docs-website/sidebars.js
index 9cc035f3e29e0..4d2420256ebff 100644
--- a/docs-website/sidebars.js
+++ b/docs-website/sidebars.js
@@ -546,6 +546,7 @@ module.exports = {
"docs/CONTRIBUTING",
"docs/links",
"docs/rfc",
+ "SECURITY",
],
},
{
From 5911a7b45ed726292b2aa77c9e307d0e8683603a Mon Sep 17 00:00:00 2001
From: sachinsaju <33017477+sachinsaju@users.noreply.github.com>
Date: Fri, 10 Nov 2023 01:54:53 +0530
Subject: [PATCH 082/792] docs(java-sdk-dataset): add dataset via java sdk
example (#9136)
Co-authored-by: Hyejin Yoon <0327jane@gmail.com>
---
docs/api/tutorials/datasets.md | 7 ++
.../datahubproject/examples/DatasetAdd.java | 84 +++++++++++++++++++
2 files changed, 91 insertions(+)
create mode 100644 metadata-integration/java/examples/src/main/java/io/datahubproject/examples/DatasetAdd.java
diff --git a/docs/api/tutorials/datasets.md b/docs/api/tutorials/datasets.md
index 7c6d4a88d4190..39b0fdce1bdb5 100644
--- a/docs/api/tutorials/datasets.md
+++ b/docs/api/tutorials/datasets.md
@@ -28,6 +28,13 @@ For detailed steps, please refer to [Datahub Quickstart Guide](/docs/quickstart.
> 🚫 Creating a dataset via `graphql` is currently not supported.
> Please check out [API feature comparison table](/docs/api/datahub-apis.md#datahub-api-comparison) for more information.
+
+
+
+```java
+{{ inline /metadata-integration/java/examples/src/main/java/io/datahubproject/examples/DatasetAdd.java show_path_as_comment }}
+```
+
diff --git a/metadata-integration/java/examples/src/main/java/io/datahubproject/examples/DatasetAdd.java b/metadata-integration/java/examples/src/main/java/io/datahubproject/examples/DatasetAdd.java
new file mode 100644
index 0000000000000..ac368972e8dc9
--- /dev/null
+++ b/metadata-integration/java/examples/src/main/java/io/datahubproject/examples/DatasetAdd.java
@@ -0,0 +1,84 @@
+package io.datahubproject.examples;
+
+import com.linkedin.common.AuditStamp;
+import com.linkedin.common.urn.CorpuserUrn;
+import com.linkedin.common.urn.DataPlatformUrn;
+import com.linkedin.common.urn.DatasetUrn;
+import com.linkedin.common.urn.UrnUtils;
+import com.linkedin.schema.DateType;
+import com.linkedin.schema.OtherSchema;
+import com.linkedin.schema.SchemaField;
+import com.linkedin.schema.SchemaFieldArray;
+import com.linkedin.schema.SchemaFieldDataType;
+import com.linkedin.schema.SchemaMetadata;
+import com.linkedin.schema.StringType;
+import datahub.client.MetadataWriteResponse;
+import datahub.client.rest.RestEmitter;
+import datahub.event.MetadataChangeProposalWrapper;
+
+import java.io.IOException;
+import java.util.concurrent.ExecutionException;
+import java.util.concurrent.Future;
+
+public class DatasetAdd {
+
+ private DatasetAdd() {
+
+ }
+
+ public static void main(String[] args) throws IOException, ExecutionException, InterruptedException {
+ DatasetUrn datasetUrn = UrnUtils.toDatasetUrn("hive", "fct_users_deleted", "PROD");
+ CorpuserUrn userUrn = new CorpuserUrn("ingestion");
+ AuditStamp lastModified = new AuditStamp().setTime(1640692800000L).setActor(userUrn);
+
+ SchemaMetadata schemaMetadata = new SchemaMetadata()
+ .setSchemaName("customer")
+ .setPlatform(new DataPlatformUrn("hive"))
+ .setVersion(0L)
+ .setHash("")
+ .setPlatformSchema(SchemaMetadata.PlatformSchema.create(new OtherSchema().setRawSchema("__insert raw schema here__")))
+ .setLastModified(lastModified);
+
+ SchemaFieldArray fields = new SchemaFieldArray();
+
+ SchemaField field1 = new SchemaField()
+ .setFieldPath("address.zipcode")
+ .setType(new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new StringType())))
+ .setNativeDataType("VARCHAR(50)")
+ .setDescription("This is the zipcode of the address. Specified using extended form and limited to addresses in the United States")
+ .setLastModified(lastModified);
+ fields.add(field1);
+
+ SchemaField field2 = new SchemaField().setFieldPath("address.street")
+ .setType(new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new StringType())))
+ .setNativeDataType("VARCHAR(100)")
+ .setDescription("Street corresponding to the address")
+ .setLastModified(lastModified);
+ fields.add(field2);
+
+ SchemaField field3 = new SchemaField().setFieldPath("last_sold_date")
+ .setType(new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new DateType())))
+ .setNativeDataType("Date")
+ .setDescription("Date of the last sale date for this property")
+ .setLastModified(lastModified);
+ fields.add(field3);
+
+ schemaMetadata.setFields(fields);
+
+ MetadataChangeProposalWrapper mcpw = MetadataChangeProposalWrapper.builder()
+ .entityType("dataset")
+ .entityUrn(datasetUrn)
+ .upsert()
+ .aspect(schemaMetadata)
+ .build();
+
+ String token = "";
+ RestEmitter emitter = RestEmitter.create(
+ b -> b.server("http://localhost:8080")
+ .token(token)
+ );
+ Future response = emitter.emit(mcpw, null);
+ System.out.println(response.get().getResponseContent());
+ }
+
+}
\ No newline at end of file
From d6cb106fab4a4d49193afd0efd8ff7d90a8d3fa8 Mon Sep 17 00:00:00 2001
From: sachinsaju <33017477+sachinsaju@users.noreply.github.com>
Date: Fri, 10 Nov 2023 02:10:55 +0530
Subject: [PATCH 083/792] doc(java-sdk-example):example to create tag via
java-sdk (#9151)
---
docs/api/tutorials/tags.md | 8 ++++
.../io/datahubproject/examples/TagCreate.java | 40 +++++++++++++++++++
2 files changed, 48 insertions(+)
create mode 100644 metadata-integration/java/examples/src/main/java/io/datahubproject/examples/TagCreate.java
diff --git a/docs/api/tutorials/tags.md b/docs/api/tutorials/tags.md
index b2234bf00bcb9..24d583dc26dac 100644
--- a/docs/api/tutorials/tags.md
+++ b/docs/api/tutorials/tags.md
@@ -78,6 +78,14 @@ Expected Response:
+
+
+```java
+{{ inline /metadata-integration/java/examples/src/main/java/io/datahubproject/examples/TagCreate.java show_path_as_comment }}
+```
+
+
+
```python
diff --git a/metadata-integration/java/examples/src/main/java/io/datahubproject/examples/TagCreate.java b/metadata-integration/java/examples/src/main/java/io/datahubproject/examples/TagCreate.java
new file mode 100644
index 0000000000000..077489a9e02d9
--- /dev/null
+++ b/metadata-integration/java/examples/src/main/java/io/datahubproject/examples/TagCreate.java
@@ -0,0 +1,40 @@
+package io.datahubproject.examples;
+
+import com.linkedin.tag.TagProperties;
+import datahub.client.MetadataWriteResponse;
+import datahub.client.rest.RestEmitter;
+import datahub.event.MetadataChangeProposalWrapper;
+
+import java.io.IOException;
+import java.util.concurrent.ExecutionException;
+import java.util.concurrent.Future;
+
+public class TagCreate {
+
+ private TagCreate() {
+
+ }
+
+ public static void main(String[] args) throws IOException, ExecutionException, InterruptedException {
+ TagProperties tagProperties = new TagProperties()
+ .setName("Deprecated")
+ .setDescription("Having this tag means this column or table is deprecated.");
+
+ MetadataChangeProposalWrapper mcpw = MetadataChangeProposalWrapper.builder()
+ .entityType("tag")
+ .entityUrn("urn:li:tag:deprecated")
+ .upsert()
+ .aspect(tagProperties)
+ .build();
+
+ String token = "";
+ RestEmitter emitter = RestEmitter.create(
+ b -> b.server("http://localhost:8080")
+ .token(token)
+ );
+ Future response = emitter.emit(mcpw, null);
+ System.out.println(response.get().getResponseContent());
+
+
+ }
+}
From 107713846f56e761011fd811fd8ac3b0b87a40bd Mon Sep 17 00:00:00 2001
From: Teppo Naakka
Date: Fri, 10 Nov 2023 02:48:06 +0200
Subject: [PATCH 084/792] fix(ingest/powerbi): use dataset workspace id as key
for parent container (#8994)
---
.../ingestion/source/powerbi/powerbi.py | 42 +-
.../powerbi/golden_test_container.json | 1089 +++++++++++++----
..._config_and_modified_since_admin_only.json | 210 +++-
.../tests/integration/powerbi/test_powerbi.py | 4 +
4 files changed, 1004 insertions(+), 341 deletions(-)
diff --git a/metadata-ingestion/src/datahub/ingestion/source/powerbi/powerbi.py b/metadata-ingestion/src/datahub/ingestion/source/powerbi/powerbi.py
index 4611a8eed4782..dc4394efcf245 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/powerbi/powerbi.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/powerbi/powerbi.py
@@ -4,7 +4,7 @@
#
#########################################################
import logging
-from typing import Iterable, List, Optional, Set, Tuple, Union
+from typing import Iterable, List, Optional, Tuple, Union
import datahub.emitter.mce_builder as builder
import datahub.ingestion.source.powerbi.rest_api_wrapper.data_classes as powerbi_data_classes
@@ -110,8 +110,7 @@ def __init__(
self.__config = config
self.__reporter = reporter
self.__dataplatform_instance_resolver = dataplatform_instance_resolver
- self.processed_datasets: Set[powerbi_data_classes.PowerBIDataset] = set()
- self.workspace_key: ContainerKey
+ self.workspace_key: Optional[ContainerKey] = None
@staticmethod
def urn_to_lowercase(value: str, flag: bool) -> str:
@@ -374,6 +373,9 @@ def to_datahub_dataset(
f"Mapping dataset={dataset.name}(id={dataset.id}) to datahub dataset"
)
+ if self.__config.extract_datasets_to_containers:
+ dataset_mcps.extend(self.generate_container_for_dataset(dataset))
+
for table in dataset.tables:
# Create a URN for dataset
ds_urn = builder.make_dataset_urn_with_platform_instance(
@@ -461,7 +463,6 @@ def to_datahub_dataset(
self.append_container_mcp(
dataset_mcps,
- workspace,
ds_urn,
dataset,
)
@@ -473,8 +474,6 @@ def to_datahub_dataset(
dataset.tags,
)
- self.processed_datasets.add(dataset)
-
return dataset_mcps
@staticmethod
@@ -572,7 +571,6 @@ def tile_custom_properties(tile: powerbi_data_classes.Tile) -> dict:
self.append_container_mcp(
result_mcps,
- workspace,
chart_urn,
)
@@ -695,7 +693,6 @@ def chart_custom_properties(dashboard: powerbi_data_classes.Dashboard) -> dict:
self.append_container_mcp(
list_of_mcps,
- workspace,
dashboard_urn,
)
@@ -711,7 +708,6 @@ def chart_custom_properties(dashboard: powerbi_data_classes.Dashboard) -> dict:
def append_container_mcp(
self,
list_of_mcps: List[MetadataChangeProposalWrapper],
- workspace: powerbi_data_classes.Workspace,
entity_urn: str,
dataset: Optional[powerbi_data_classes.PowerBIDataset] = None,
) -> None:
@@ -719,12 +715,8 @@ def append_container_mcp(
dataset, powerbi_data_classes.PowerBIDataset
):
container_key = dataset.get_dataset_key(self.__config.platform_name)
- elif self.__config.extract_workspaces_to_containers:
- container_key = workspace.get_workspace_key(
- platform_name=self.__config.platform_name,
- platform_instance=self.__config.platform_instance,
- workspace_id_as_urn_part=self.__config.workspace_id_as_urn_part,
- )
+ elif self.__config.extract_workspaces_to_containers and self.workspace_key:
+ container_key = self.workspace_key
else:
return None
@@ -743,6 +735,7 @@ def generate_container_for_workspace(
) -> Iterable[MetadataWorkUnit]:
self.workspace_key = workspace.get_workspace_key(
platform_name=self.__config.platform_name,
+ platform_instance=self.__config.platform_instance,
workspace_id_as_urn_part=self.__config.workspace_id_as_urn_part,
)
container_work_units = gen_containers(
@@ -754,7 +747,7 @@ def generate_container_for_workspace(
def generate_container_for_dataset(
self, dataset: powerbi_data_classes.PowerBIDataset
- ) -> Iterable[MetadataWorkUnit]:
+ ) -> Iterable[MetadataChangeProposalWrapper]:
dataset_key = dataset.get_dataset_key(self.__config.platform_name)
container_work_units = gen_containers(
container_key=dataset_key,
@@ -762,7 +755,13 @@ def generate_container_for_dataset(
parent_container_key=self.workspace_key,
sub_types=[BIContainerSubTypes.POWERBI_DATASET],
)
- return container_work_units
+
+ # The if statement here is just to satisfy mypy
+ return [
+ wu.metadata
+ for wu in container_work_units
+ if isinstance(wu.metadata, MetadataChangeProposalWrapper)
+ ]
def append_tag_mcp(
self,
@@ -965,7 +964,6 @@ def to_chart_mcps(
self.append_container_mcp(
list_of_mcps,
- workspace,
chart_urn,
)
@@ -1086,7 +1084,6 @@ def report_to_dashboard(
self.append_container_mcp(
list_of_mcps,
- workspace,
dashboard_urn,
)
@@ -1220,10 +1217,6 @@ def validate_dataset_type_mapping(self):
f"Dataset lineage would get ingested for data-platform = {self.source_config.dataset_type_mapping}"
)
- def extract_datasets_as_containers(self):
- for dataset in self.mapper.processed_datasets:
- yield from self.mapper.generate_container_for_dataset(dataset)
-
def extract_independent_datasets(
self, workspace: powerbi_data_classes.Workspace
) -> Iterable[MetadataWorkUnit]:
@@ -1270,9 +1263,6 @@ def get_workspace_workunit(
):
yield work_unit
- if self.source_config.extract_datasets_to_containers:
- yield from self.extract_datasets_as_containers()
-
yield from self.extract_independent_datasets(workspace)
def get_workunit_processors(self) -> List[Optional[MetadataWorkUnitProcessor]]:
diff --git a/metadata-ingestion/tests/integration/powerbi/golden_test_container.json b/metadata-ingestion/tests/integration/powerbi/golden_test_container.json
index 850816bf80807..91b5499eaadcb 100644
--- a/metadata-ingestion/tests/integration/powerbi/golden_test_container.json
+++ b/metadata-ingestion/tests/integration/powerbi/golden_test_container.json
@@ -15,7 +15,8 @@
},
"systemMetadata": {
"lastObserved": 1643871600000,
- "runId": "powerbi-test"
+ "runId": "powerbi-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -30,7 +31,8 @@
},
"systemMetadata": {
"lastObserved": 1643871600000,
- "runId": "powerbi-test"
+ "runId": "powerbi-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -45,7 +47,8 @@
},
"systemMetadata": {
"lastObserved": 1643871600000,
- "runId": "powerbi-test"
+ "runId": "powerbi-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -62,7 +65,8 @@
},
"systemMetadata": {
"lastObserved": 1643871600000,
- "runId": "powerbi-test"
+ "runId": "powerbi-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -77,7 +81,44 @@
},
"systemMetadata": {
"lastObserved": 1643871600000,
- "runId": "powerbi-test"
+ "runId": "powerbi-test",
+ "lastRunId": "no-run-id-provided"
+ }
+},
+{
+ "entityType": "container",
+ "entityUrn": "urn:li:container:6ac0662f0f2fc3a9196ac505da2182b2",
+ "changeType": "UPSERT",
+ "aspectName": "containerProperties",
+ "aspect": {
+ "json": {
+ "customProperties": {
+ "platform": "powerbi",
+ "dataset": "05169CD2-E713-41E6-9600-1D8066D95445"
+ },
+ "name": "library-dataset"
+ }
+ },
+ "systemMetadata": {
+ "lastObserved": 1643871600000,
+ "runId": "powerbi-test",
+ "lastRunId": "no-run-id-provided"
+ }
+},
+{
+ "entityType": "container",
+ "entityUrn": "urn:li:container:6ac0662f0f2fc3a9196ac505da2182b2",
+ "changeType": "UPSERT",
+ "aspectName": "status",
+ "aspect": {
+ "json": {
+ "removed": false
+ }
+ },
+ "systemMetadata": {
+ "lastObserved": 1643871600000,
+ "runId": "powerbi-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -94,7 +135,8 @@
},
"systemMetadata": {
"lastObserved": 1643871600000,
- "runId": "powerbi-test"
+ "runId": "powerbi-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -115,7 +157,79 @@
},
"systemMetadata": {
"lastObserved": 1643871600000,
- "runId": "powerbi-test"
+ "runId": "powerbi-test",
+ "lastRunId": "no-run-id-provided"
+ }
+},
+{
+ "entityType": "container",
+ "entityUrn": "urn:li:container:6ac0662f0f2fc3a9196ac505da2182b2",
+ "changeType": "UPSERT",
+ "aspectName": "dataPlatformInstance",
+ "aspect": {
+ "json": {
+ "platform": "urn:li:dataPlatform:powerbi"
+ }
+ },
+ "systemMetadata": {
+ "lastObserved": 1643871600000,
+ "runId": "powerbi-test",
+ "lastRunId": "no-run-id-provided"
+ }
+},
+{
+ "entityType": "container",
+ "entityUrn": "urn:li:container:6ac0662f0f2fc3a9196ac505da2182b2",
+ "changeType": "UPSERT",
+ "aspectName": "subTypes",
+ "aspect": {
+ "json": {
+ "typeNames": [
+ "PowerBI Dataset"
+ ]
+ }
+ },
+ "systemMetadata": {
+ "lastObserved": 1643871600000,
+ "runId": "powerbi-test",
+ "lastRunId": "no-run-id-provided"
+ }
+},
+{
+ "entityType": "container",
+ "entityUrn": "urn:li:container:6ac0662f0f2fc3a9196ac505da2182b2",
+ "changeType": "UPSERT",
+ "aspectName": "browsePathsV2",
+ "aspect": {
+ "json": {
+ "path": [
+ {
+ "id": "urn:li:container:a4ed52f9abd3ff9cc34960c0c41f72e9",
+ "urn": "urn:li:container:a4ed52f9abd3ff9cc34960c0c41f72e9"
+ }
+ ]
+ }
+ },
+ "systemMetadata": {
+ "lastObserved": 1643871600000,
+ "runId": "powerbi-test",
+ "lastRunId": "no-run-id-provided"
+ }
+},
+{
+ "entityType": "container",
+ "entityUrn": "urn:li:container:6ac0662f0f2fc3a9196ac505da2182b2",
+ "changeType": "UPSERT",
+ "aspectName": "container",
+ "aspect": {
+ "json": {
+ "container": "urn:li:container:a4ed52f9abd3ff9cc34960c0c41f72e9"
+ }
+ },
+ "systemMetadata": {
+ "lastObserved": 1643871600000,
+ "runId": "powerbi-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -130,7 +244,8 @@
},
"systemMetadata": {
"lastObserved": 1643871600000,
- "runId": "powerbi-test"
+ "runId": "powerbi-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -148,7 +263,8 @@
},
"systemMetadata": {
"lastObserved": 1643871600000,
- "runId": "powerbi-test"
+ "runId": "powerbi-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -158,12 +274,13 @@
"aspectName": "container",
"aspect": {
"json": {
- "container": "urn:li:container:a4ed52f9abd3ff9cc34960c0c41f72e9"
+ "container": "urn:li:container:6ac0662f0f2fc3a9196ac505da2182b2"
}
},
"systemMetadata": {
"lastObserved": 1643871600000,
- "runId": "powerbi-test"
+ "runId": "powerbi-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -177,13 +294,18 @@
{
"id": "urn:li:container:a4ed52f9abd3ff9cc34960c0c41f72e9",
"urn": "urn:li:container:a4ed52f9abd3ff9cc34960c0c41f72e9"
+ },
+ {
+ "id": "urn:li:container:6ac0662f0f2fc3a9196ac505da2182b2",
+ "urn": "urn:li:container:6ac0662f0f2fc3a9196ac505da2182b2"
}
]
}
},
"systemMetadata": {
"lastObserved": 1643871600000,
- "runId": "powerbi-test"
+ "runId": "powerbi-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -200,7 +322,8 @@
},
"systemMetadata": {
"lastObserved": 1643871600000,
- "runId": "powerbi-test"
+ "runId": "powerbi-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -221,7 +344,8 @@
},
"systemMetadata": {
"lastObserved": 1643871600000,
- "runId": "powerbi-test"
+ "runId": "powerbi-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -236,7 +360,8 @@
},
"systemMetadata": {
"lastObserved": 1643871600000,
- "runId": "powerbi-test"
+ "runId": "powerbi-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -254,7 +379,8 @@
},
"systemMetadata": {
"lastObserved": 1643871600000,
- "runId": "powerbi-test"
+ "runId": "powerbi-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -264,12 +390,13 @@
"aspectName": "container",
"aspect": {
"json": {
- "container": "urn:li:container:a4ed52f9abd3ff9cc34960c0c41f72e9"
+ "container": "urn:li:container:6ac0662f0f2fc3a9196ac505da2182b2"
}
},
"systemMetadata": {
"lastObserved": 1643871600000,
- "runId": "powerbi-test"
+ "runId": "powerbi-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -283,13 +410,18 @@
{
"id": "urn:li:container:a4ed52f9abd3ff9cc34960c0c41f72e9",
"urn": "urn:li:container:a4ed52f9abd3ff9cc34960c0c41f72e9"
+ },
+ {
+ "id": "urn:li:container:6ac0662f0f2fc3a9196ac505da2182b2",
+ "urn": "urn:li:container:6ac0662f0f2fc3a9196ac505da2182b2"
}
]
}
},
"systemMetadata": {
"lastObserved": 1643871600000,
- "runId": "powerbi-test"
+ "runId": "powerbi-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -306,7 +438,8 @@
},
"systemMetadata": {
"lastObserved": 1643871600000,
- "runId": "powerbi-test"
+ "runId": "powerbi-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -327,7 +460,8 @@
},
"systemMetadata": {
"lastObserved": 1643871600000,
- "runId": "powerbi-test"
+ "runId": "powerbi-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -342,7 +476,8 @@
},
"systemMetadata": {
"lastObserved": 1643871600000,
- "runId": "powerbi-test"
+ "runId": "powerbi-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -360,7 +495,8 @@
},
"systemMetadata": {
"lastObserved": 1643871600000,
- "runId": "powerbi-test"
+ "runId": "powerbi-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -370,12 +506,13 @@
"aspectName": "container",
"aspect": {
"json": {
- "container": "urn:li:container:a4ed52f9abd3ff9cc34960c0c41f72e9"
+ "container": "urn:li:container:6ac0662f0f2fc3a9196ac505da2182b2"
}
},
"systemMetadata": {
"lastObserved": 1643871600000,
- "runId": "powerbi-test"
+ "runId": "powerbi-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -389,13 +526,18 @@
{
"id": "urn:li:container:a4ed52f9abd3ff9cc34960c0c41f72e9",
"urn": "urn:li:container:a4ed52f9abd3ff9cc34960c0c41f72e9"
+ },
+ {
+ "id": "urn:li:container:6ac0662f0f2fc3a9196ac505da2182b2",
+ "urn": "urn:li:container:6ac0662f0f2fc3a9196ac505da2182b2"
}
]
}
},
"systemMetadata": {
"lastObserved": 1643871600000,
- "runId": "powerbi-test"
+ "runId": "powerbi-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -412,7 +554,8 @@
},
"systemMetadata": {
"lastObserved": 1643871600000,
- "runId": "powerbi-test"
+ "runId": "powerbi-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -433,7 +576,8 @@
},
"systemMetadata": {
"lastObserved": 1643871600000,
- "runId": "powerbi-test"
+ "runId": "powerbi-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -448,7 +592,8 @@
},
"systemMetadata": {
"lastObserved": 1643871600000,
- "runId": "powerbi-test"
+ "runId": "powerbi-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -466,7 +611,8 @@
},
"systemMetadata": {
"lastObserved": 1643871600000,
- "runId": "powerbi-test"
+ "runId": "powerbi-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -476,12 +622,13 @@
"aspectName": "container",
"aspect": {
"json": {
- "container": "urn:li:container:a4ed52f9abd3ff9cc34960c0c41f72e9"
+ "container": "urn:li:container:6ac0662f0f2fc3a9196ac505da2182b2"
}
},
"systemMetadata": {
"lastObserved": 1643871600000,
- "runId": "powerbi-test"
+ "runId": "powerbi-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -495,13 +642,18 @@
{
"id": "urn:li:container:a4ed52f9abd3ff9cc34960c0c41f72e9",
"urn": "urn:li:container:a4ed52f9abd3ff9cc34960c0c41f72e9"
+ },
+ {
+ "id": "urn:li:container:6ac0662f0f2fc3a9196ac505da2182b2",
+ "urn": "urn:li:container:6ac0662f0f2fc3a9196ac505da2182b2"
}
]
}
},
"systemMetadata": {
"lastObserved": 1643871600000,
- "runId": "powerbi-test"
+ "runId": "powerbi-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -518,7 +670,8 @@
},
"systemMetadata": {
"lastObserved": 1643871600000,
- "runId": "powerbi-test"
+ "runId": "powerbi-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -539,7 +692,8 @@
},
"systemMetadata": {
"lastObserved": 1643871600000,
- "runId": "powerbi-test"
+ "runId": "powerbi-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -554,7 +708,8 @@
},
"systemMetadata": {
"lastObserved": 1643871600000,
- "runId": "powerbi-test"
+ "runId": "powerbi-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -572,7 +727,8 @@
},
"systemMetadata": {
"lastObserved": 1643871600000,
- "runId": "powerbi-test"
+ "runId": "powerbi-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -582,12 +738,13 @@
"aspectName": "container",
"aspect": {
"json": {
- "container": "urn:li:container:a4ed52f9abd3ff9cc34960c0c41f72e9"
+ "container": "urn:li:container:6ac0662f0f2fc3a9196ac505da2182b2"
}
},
"systemMetadata": {
"lastObserved": 1643871600000,
- "runId": "powerbi-test"
+ "runId": "powerbi-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -601,13 +758,18 @@
{
"id": "urn:li:container:a4ed52f9abd3ff9cc34960c0c41f72e9",
"urn": "urn:li:container:a4ed52f9abd3ff9cc34960c0c41f72e9"
+ },
+ {
+ "id": "urn:li:container:6ac0662f0f2fc3a9196ac505da2182b2",
+ "urn": "urn:li:container:6ac0662f0f2fc3a9196ac505da2182b2"
}
]
}
},
"systemMetadata": {
"lastObserved": 1643871600000,
- "runId": "powerbi-test"
+ "runId": "powerbi-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -624,7 +786,8 @@
},
"systemMetadata": {
"lastObserved": 1643871600000,
- "runId": "powerbi-test"
+ "runId": "powerbi-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -645,7 +808,8 @@
},
"systemMetadata": {
"lastObserved": 1643871600000,
- "runId": "powerbi-test"
+ "runId": "powerbi-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -660,7 +824,8 @@
},
"systemMetadata": {
"lastObserved": 1643871600000,
- "runId": "powerbi-test"
+ "runId": "powerbi-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -678,7 +843,8 @@
},
"systemMetadata": {
"lastObserved": 1643871600000,
- "runId": "powerbi-test"
+ "runId": "powerbi-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -688,12 +854,13 @@
"aspectName": "container",
"aspect": {
"json": {
- "container": "urn:li:container:a4ed52f9abd3ff9cc34960c0c41f72e9"
+ "container": "urn:li:container:6ac0662f0f2fc3a9196ac505da2182b2"
}
},
"systemMetadata": {
"lastObserved": 1643871600000,
- "runId": "powerbi-test"
+ "runId": "powerbi-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -707,13 +874,18 @@
{
"id": "urn:li:container:a4ed52f9abd3ff9cc34960c0c41f72e9",
"urn": "urn:li:container:a4ed52f9abd3ff9cc34960c0c41f72e9"
+ },
+ {
+ "id": "urn:li:container:6ac0662f0f2fc3a9196ac505da2182b2",
+ "urn": "urn:li:container:6ac0662f0f2fc3a9196ac505da2182b2"
}
]
}
},
"systemMetadata": {
"lastObserved": 1643871600000,
- "runId": "powerbi-test"
+ "runId": "powerbi-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -730,7 +902,8 @@
},
"systemMetadata": {
"lastObserved": 1643871600000,
- "runId": "powerbi-test"
+ "runId": "powerbi-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -751,7 +924,8 @@
},
"systemMetadata": {
"lastObserved": 1643871600000,
- "runId": "powerbi-test"
+ "runId": "powerbi-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -766,7 +940,8 @@
},
"systemMetadata": {
"lastObserved": 1643871600000,
- "runId": "powerbi-test"
+ "runId": "powerbi-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -784,7 +959,8 @@
},
"systemMetadata": {
"lastObserved": 1643871600000,
- "runId": "powerbi-test"
+ "runId": "powerbi-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -794,12 +970,13 @@
"aspectName": "container",
"aspect": {
"json": {
- "container": "urn:li:container:a4ed52f9abd3ff9cc34960c0c41f72e9"
+ "container": "urn:li:container:6ac0662f0f2fc3a9196ac505da2182b2"
}
},
"systemMetadata": {
"lastObserved": 1643871600000,
- "runId": "powerbi-test"
+ "runId": "powerbi-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -813,13 +990,54 @@
{
"id": "urn:li:container:a4ed52f9abd3ff9cc34960c0c41f72e9",
"urn": "urn:li:container:a4ed52f9abd3ff9cc34960c0c41f72e9"
+ },
+ {
+ "id": "urn:li:container:6ac0662f0f2fc3a9196ac505da2182b2",
+ "urn": "urn:li:container:6ac0662f0f2fc3a9196ac505da2182b2"
}
]
}
},
"systemMetadata": {
"lastObserved": 1643871600000,
- "runId": "powerbi-test"
+ "runId": "powerbi-test",
+ "lastRunId": "no-run-id-provided"
+ }
+},
+{
+ "entityType": "container",
+ "entityUrn": "urn:li:container:977b804137a1d2bf897ff1bbf440a1cc",
+ "changeType": "UPSERT",
+ "aspectName": "containerProperties",
+ "aspect": {
+ "json": {
+ "customProperties": {
+ "platform": "powerbi",
+ "dataset": "ba0130a1-5b03-40de-9535-b34e778ea6ed"
+ },
+ "name": "hr_pbi_test"
+ }
+ },
+ "systemMetadata": {
+ "lastObserved": 1643871600000,
+ "runId": "powerbi-test",
+ "lastRunId": "no-run-id-provided"
+ }
+},
+{
+ "entityType": "container",
+ "entityUrn": "urn:li:container:977b804137a1d2bf897ff1bbf440a1cc",
+ "changeType": "UPSERT",
+ "aspectName": "status",
+ "aspect": {
+ "json": {
+ "removed": false
+ }
+ },
+ "systemMetadata": {
+ "lastObserved": 1643871600000,
+ "runId": "powerbi-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -836,7 +1054,8 @@
},
"systemMetadata": {
"lastObserved": 1643871600000,
- "runId": "powerbi-test"
+ "runId": "powerbi-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -857,7 +1076,79 @@
},
"systemMetadata": {
"lastObserved": 1643871600000,
- "runId": "powerbi-test"
+ "runId": "powerbi-test",
+ "lastRunId": "no-run-id-provided"
+ }
+},
+{
+ "entityType": "container",
+ "entityUrn": "urn:li:container:977b804137a1d2bf897ff1bbf440a1cc",
+ "changeType": "UPSERT",
+ "aspectName": "dataPlatformInstance",
+ "aspect": {
+ "json": {
+ "platform": "urn:li:dataPlatform:powerbi"
+ }
+ },
+ "systemMetadata": {
+ "lastObserved": 1643871600000,
+ "runId": "powerbi-test",
+ "lastRunId": "no-run-id-provided"
+ }
+},
+{
+ "entityType": "container",
+ "entityUrn": "urn:li:container:977b804137a1d2bf897ff1bbf440a1cc",
+ "changeType": "UPSERT",
+ "aspectName": "subTypes",
+ "aspect": {
+ "json": {
+ "typeNames": [
+ "PowerBI Dataset"
+ ]
+ }
+ },
+ "systemMetadata": {
+ "lastObserved": 1643871600000,
+ "runId": "powerbi-test",
+ "lastRunId": "no-run-id-provided"
+ }
+},
+{
+ "entityType": "container",
+ "entityUrn": "urn:li:container:977b804137a1d2bf897ff1bbf440a1cc",
+ "changeType": "UPSERT",
+ "aspectName": "browsePathsV2",
+ "aspect": {
+ "json": {
+ "path": [
+ {
+ "id": "urn:li:container:a4ed52f9abd3ff9cc34960c0c41f72e9",
+ "urn": "urn:li:container:a4ed52f9abd3ff9cc34960c0c41f72e9"
+ }
+ ]
+ }
+ },
+ "systemMetadata": {
+ "lastObserved": 1643871600000,
+ "runId": "powerbi-test",
+ "lastRunId": "no-run-id-provided"
+ }
+},
+{
+ "entityType": "container",
+ "entityUrn": "urn:li:container:977b804137a1d2bf897ff1bbf440a1cc",
+ "changeType": "UPSERT",
+ "aspectName": "container",
+ "aspect": {
+ "json": {
+ "container": "urn:li:container:a4ed52f9abd3ff9cc34960c0c41f72e9"
+ }
+ },
+ "systemMetadata": {
+ "lastObserved": 1643871600000,
+ "runId": "powerbi-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -872,7 +1163,8 @@
},
"systemMetadata": {
"lastObserved": 1643871600000,
- "runId": "powerbi-test"
+ "runId": "powerbi-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -890,7 +1182,8 @@
},
"systemMetadata": {
"lastObserved": 1643871600000,
- "runId": "powerbi-test"
+ "runId": "powerbi-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -900,12 +1193,13 @@
"aspectName": "container",
"aspect": {
"json": {
- "container": "urn:li:container:a4ed52f9abd3ff9cc34960c0c41f72e9"
+ "container": "urn:li:container:977b804137a1d2bf897ff1bbf440a1cc"
}
},
"systemMetadata": {
"lastObserved": 1643871600000,
- "runId": "powerbi-test"
+ "runId": "powerbi-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -919,13 +1213,18 @@
{
"id": "urn:li:container:a4ed52f9abd3ff9cc34960c0c41f72e9",
"urn": "urn:li:container:a4ed52f9abd3ff9cc34960c0c41f72e9"
+ },
+ {
+ "id": "urn:li:container:977b804137a1d2bf897ff1bbf440a1cc",
+ "urn": "urn:li:container:977b804137a1d2bf897ff1bbf440a1cc"
}
]
}
},
"systemMetadata": {
"lastObserved": 1643871600000,
- "runId": "powerbi-test"
+ "runId": "powerbi-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -942,7 +1241,8 @@
},
"systemMetadata": {
"lastObserved": 1643871600000,
- "runId": "powerbi-test"
+ "runId": "powerbi-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -963,7 +1263,8 @@
},
"systemMetadata": {
"lastObserved": 1643871600000,
- "runId": "powerbi-test"
+ "runId": "powerbi-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -978,7 +1279,8 @@
},
"systemMetadata": {
"lastObserved": 1643871600000,
- "runId": "powerbi-test"
+ "runId": "powerbi-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -996,7 +1298,8 @@
},
"systemMetadata": {
"lastObserved": 1643871600000,
- "runId": "powerbi-test"
+ "runId": "powerbi-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1006,12 +1309,13 @@
"aspectName": "container",
"aspect": {
"json": {
- "container": "urn:li:container:a4ed52f9abd3ff9cc34960c0c41f72e9"
+ "container": "urn:li:container:977b804137a1d2bf897ff1bbf440a1cc"
}
},
"systemMetadata": {
"lastObserved": 1643871600000,
- "runId": "powerbi-test"
+ "runId": "powerbi-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1025,13 +1329,18 @@
{
"id": "urn:li:container:a4ed52f9abd3ff9cc34960c0c41f72e9",
"urn": "urn:li:container:a4ed52f9abd3ff9cc34960c0c41f72e9"
+ },
+ {
+ "id": "urn:li:container:977b804137a1d2bf897ff1bbf440a1cc",
+ "urn": "urn:li:container:977b804137a1d2bf897ff1bbf440a1cc"
}
]
}
},
"systemMetadata": {
"lastObserved": 1643871600000,
- "runId": "powerbi-test"
+ "runId": "powerbi-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1046,7 +1355,8 @@
},
"systemMetadata": {
"lastObserved": 1643871600000,
- "runId": "powerbi-test"
+ "runId": "powerbi-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1061,7 +1371,8 @@
},
"systemMetadata": {
"lastObserved": 1643871600000,
- "runId": "powerbi-test"
+ "runId": "powerbi-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1089,6 +1400,9 @@
}
},
"inputs": [
+ {
+ "string": "urn:li:container:6ac0662f0f2fc3a9196ac505da2182b2"
+ },
{
"string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.public_issue_history,DEV)"
},
@@ -1115,7 +1429,8 @@
},
"systemMetadata": {
"lastObserved": 1643871600000,
- "runId": "powerbi-test"
+ "runId": "powerbi-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1130,7 +1445,8 @@
},
"systemMetadata": {
"lastObserved": 1643871600000,
- "runId": "powerbi-test"
+ "runId": "powerbi-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1146,7 +1462,8 @@
},
"systemMetadata": {
"lastObserved": 1643871600000,
- "runId": "powerbi-test"
+ "runId": "powerbi-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1163,7 +1480,8 @@
},
"systemMetadata": {
"lastObserved": 1643871600000,
- "runId": "powerbi-test"
+ "runId": "powerbi-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1178,7 +1496,8 @@
},
"systemMetadata": {
"lastObserved": 1643871600000,
- "runId": "powerbi-test"
+ "runId": "powerbi-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1198,7 +1517,8 @@
},
"systemMetadata": {
"lastObserved": 1643871600000,
- "runId": "powerbi-test"
+ "runId": "powerbi-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1226,6 +1546,9 @@
}
},
"inputs": [
+ {
+ "string": "urn:li:container:977b804137a1d2bf897ff1bbf440a1cc"
+ },
{
"string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.dbo_book_issue,DEV)"
},
@@ -1237,7 +1560,8 @@
},
"systemMetadata": {
"lastObserved": 1643871600000,
- "runId": "powerbi-test"
+ "runId": "powerbi-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1252,7 +1576,8 @@
},
"systemMetadata": {
"lastObserved": 1643871600000,
- "runId": "powerbi-test"
+ "runId": "powerbi-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1268,7 +1593,8 @@
},
"systemMetadata": {
"lastObserved": 1643871600000,
- "runId": "powerbi-test"
+ "runId": "powerbi-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1285,7 +1611,8 @@
},
"systemMetadata": {
"lastObserved": 1643871600000,
- "runId": "powerbi-test"
+ "runId": "powerbi-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1300,7 +1627,8 @@
},
"systemMetadata": {
"lastObserved": 1643871600000,
- "runId": "powerbi-test"
+ "runId": "powerbi-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1320,7 +1648,8 @@
},
"systemMetadata": {
"lastObserved": 1643871600000,
- "runId": "powerbi-test"
+ "runId": "powerbi-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1337,7 +1666,8 @@
},
"systemMetadata": {
"lastObserved": 1643871600000,
- "runId": "powerbi-test"
+ "runId": "powerbi-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1374,7 +1704,8 @@
},
"systemMetadata": {
"lastObserved": 1643871600000,
- "runId": "powerbi-test"
+ "runId": "powerbi-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1389,7 +1720,8 @@
},
"systemMetadata": {
"lastObserved": 1643871600000,
- "runId": "powerbi-test"
+ "runId": "powerbi-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1405,7 +1737,8 @@
},
"systemMetadata": {
"lastObserved": 1643871600000,
- "runId": "powerbi-test"
+ "runId": "powerbi-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1433,7 +1766,8 @@
},
"systemMetadata": {
"lastObserved": 1643871600000,
- "runId": "powerbi-test"
+ "runId": "powerbi-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1448,7 +1782,8 @@
},
"systemMetadata": {
"lastObserved": 1643871600000,
- "runId": "powerbi-test"
+ "runId": "powerbi-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1468,7 +1803,8 @@
},
"systemMetadata": {
"lastObserved": 1643871600000,
- "runId": "powerbi-test"
+ "runId": "powerbi-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1485,7 +1821,8 @@
},
"systemMetadata": {
"lastObserved": 1643871600000,
- "runId": "powerbi-test"
+ "runId": "powerbi-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1506,7 +1843,8 @@
},
"systemMetadata": {
"lastObserved": 1643871600000,
- "runId": "powerbi-test"
+ "runId": "powerbi-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1521,7 +1859,8 @@
},
"systemMetadata": {
"lastObserved": 1643871600000,
- "runId": "powerbi-test"
+ "runId": "powerbi-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1539,22 +1878,8 @@
},
"systemMetadata": {
"lastObserved": 1643871600000,
- "runId": "powerbi-test"
- }
-},
-{
- "entityType": "dataset",
- "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.public_issue_history,DEV)",
- "changeType": "UPSERT",
- "aspectName": "container",
- "aspect": {
- "json": {
- "container": "urn:li:container:a4ed52f9abd3ff9cc34960c0c41f72e9"
- }
- },
- "systemMetadata": {
- "lastObserved": 1643871600000,
- "runId": "powerbi-test"
+ "runId": "powerbi-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1571,7 +1896,8 @@
},
"systemMetadata": {
"lastObserved": 1643871600000,
- "runId": "powerbi-test"
+ "runId": "powerbi-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1592,7 +1918,8 @@
},
"systemMetadata": {
"lastObserved": 1643871600000,
- "runId": "powerbi-test"
+ "runId": "powerbi-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1607,7 +1934,8 @@
},
"systemMetadata": {
"lastObserved": 1643871600000,
- "runId": "powerbi-test"
+ "runId": "powerbi-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1625,22 +1953,8 @@
},
"systemMetadata": {
"lastObserved": 1643871600000,
- "runId": "powerbi-test"
- }
-},
-{
- "entityType": "dataset",
- "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.SNOWFLAKE_TESTTABLE,DEV)",
- "changeType": "UPSERT",
- "aspectName": "container",
- "aspect": {
- "json": {
- "container": "urn:li:container:a4ed52f9abd3ff9cc34960c0c41f72e9"
- }
- },
- "systemMetadata": {
- "lastObserved": 1643871600000,
- "runId": "powerbi-test"
+ "runId": "powerbi-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1657,7 +1971,8 @@
},
"systemMetadata": {
"lastObserved": 1643871600000,
- "runId": "powerbi-test"
+ "runId": "powerbi-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1678,7 +1993,8 @@
},
"systemMetadata": {
"lastObserved": 1643871600000,
- "runId": "powerbi-test"
+ "runId": "powerbi-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1693,7 +2009,8 @@
},
"systemMetadata": {
"lastObserved": 1643871600000,
- "runId": "powerbi-test"
+ "runId": "powerbi-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1711,22 +2028,8 @@
},
"systemMetadata": {
"lastObserved": 1643871600000,
- "runId": "powerbi-test"
- }
-},
-{
- "entityType": "dataset",
- "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query,DEV)",
- "changeType": "UPSERT",
- "aspectName": "container",
- "aspect": {
- "json": {
- "container": "urn:li:container:a4ed52f9abd3ff9cc34960c0c41f72e9"
- }
- },
- "systemMetadata": {
- "lastObserved": 1643871600000,
- "runId": "powerbi-test"
+ "runId": "powerbi-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1743,7 +2046,8 @@
},
"systemMetadata": {
"lastObserved": 1643871600000,
- "runId": "powerbi-test"
+ "runId": "powerbi-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1764,7 +2068,8 @@
},
"systemMetadata": {
"lastObserved": 1643871600000,
- "runId": "powerbi-test"
+ "runId": "powerbi-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1779,7 +2084,8 @@
},
"systemMetadata": {
"lastObserved": 1643871600000,
- "runId": "powerbi-test"
+ "runId": "powerbi-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1797,46 +2103,33 @@
},
"systemMetadata": {
"lastObserved": 1643871600000,
- "runId": "powerbi-test"
+ "runId": "powerbi-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
"entityType": "dataset",
- "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.big-query-with-parameter,DEV)",
+ "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query-with-join,DEV)",
"changeType": "UPSERT",
- "aspectName": "container",
+ "aspectName": "viewProperties",
"aspect": {
"json": {
- "container": "urn:li:container:a4ed52f9abd3ff9cc34960c0c41f72e9"
+ "materialized": false,
+ "viewLogic": "let\n Source = Value.NativeQuery(Snowflake.Databases(\"xaa48144.snowflakecomputing.com\",\"GSL_TEST_WH\",[Role=\"ACCOUNTADMIN\"]){[Name=\"GSL_TEST_DB\"]}[Data], \"select A.name from GSL_TEST_DB.PUBLIC.SALES_ANALYST as A inner join GSL_TEST_DB.PUBLIC.SALES_FORECAST as B on A.name = B.name where startswith(A.name, 'mo')\", null, [EnableFolding=true])\nin\n Source",
+ "viewLanguage": "m_query"
}
},
"systemMetadata": {
"lastObserved": 1643871600000,
- "runId": "powerbi-test"
+ "runId": "powerbi-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
"entityType": "dataset",
"entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query-with-join,DEV)",
"changeType": "UPSERT",
- "aspectName": "viewProperties",
- "aspect": {
- "json": {
- "materialized": false,
- "viewLogic": "let\n Source = Value.NativeQuery(Snowflake.Databases(\"xaa48144.snowflakecomputing.com\",\"GSL_TEST_WH\",[Role=\"ACCOUNTADMIN\"]){[Name=\"GSL_TEST_DB\"]}[Data], \"select A.name from GSL_TEST_DB.PUBLIC.SALES_ANALYST as A inner join GSL_TEST_DB.PUBLIC.SALES_FORECAST as B on A.name = B.name where startswith(A.name, 'mo')\", null, [EnableFolding=true])\nin\n Source",
- "viewLanguage": "m_query"
- }
- },
- "systemMetadata": {
- "lastObserved": 1643871600000,
- "runId": "powerbi-test"
- }
-},
-{
- "entityType": "dataset",
- "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query-with-join,DEV)",
- "changeType": "UPSERT",
- "aspectName": "datasetProperties",
+ "aspectName": "datasetProperties",
"aspect": {
"json": {
"customProperties": {
@@ -1850,7 +2143,8 @@
},
"systemMetadata": {
"lastObserved": 1643871600000,
- "runId": "powerbi-test"
+ "runId": "powerbi-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1865,7 +2159,8 @@
},
"systemMetadata": {
"lastObserved": 1643871600000,
- "runId": "powerbi-test"
+ "runId": "powerbi-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1883,22 +2178,8 @@
},
"systemMetadata": {
"lastObserved": 1643871600000,
- "runId": "powerbi-test"
- }
-},
-{
- "entityType": "dataset",
- "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query-with-join,DEV)",
- "changeType": "UPSERT",
- "aspectName": "container",
- "aspect": {
- "json": {
- "container": "urn:li:container:a4ed52f9abd3ff9cc34960c0c41f72e9"
- }
- },
- "systemMetadata": {
- "lastObserved": 1643871600000,
- "runId": "powerbi-test"
+ "runId": "powerbi-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1915,7 +2196,8 @@
},
"systemMetadata": {
"lastObserved": 1643871600000,
- "runId": "powerbi-test"
+ "runId": "powerbi-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1936,7 +2218,8 @@
},
"systemMetadata": {
"lastObserved": 1643871600000,
- "runId": "powerbi-test"
+ "runId": "powerbi-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1951,7 +2234,8 @@
},
"systemMetadata": {
"lastObserved": 1643871600000,
- "runId": "powerbi-test"
+ "runId": "powerbi-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1969,22 +2253,8 @@
},
"systemMetadata": {
"lastObserved": 1643871600000,
- "runId": "powerbi-test"
- }
-},
-{
- "entityType": "dataset",
- "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.job-history,DEV)",
- "changeType": "UPSERT",
- "aspectName": "container",
- "aspect": {
- "json": {
- "container": "urn:li:container:a4ed52f9abd3ff9cc34960c0c41f72e9"
- }
- },
- "systemMetadata": {
- "lastObserved": 1643871600000,
- "runId": "powerbi-test"
+ "runId": "powerbi-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -2001,7 +2271,8 @@
},
"systemMetadata": {
"lastObserved": 1643871600000,
- "runId": "powerbi-test"
+ "runId": "powerbi-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -2022,7 +2293,8 @@
},
"systemMetadata": {
"lastObserved": 1643871600000,
- "runId": "powerbi-test"
+ "runId": "powerbi-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -2037,7 +2309,8 @@
},
"systemMetadata": {
"lastObserved": 1643871600000,
- "runId": "powerbi-test"
+ "runId": "powerbi-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -2055,22 +2328,8 @@
},
"systemMetadata": {
"lastObserved": 1643871600000,
- "runId": "powerbi-test"
- }
-},
-{
- "entityType": "dataset",
- "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.postgres_test_table,DEV)",
- "changeType": "UPSERT",
- "aspectName": "container",
- "aspect": {
- "json": {
- "container": "urn:li:container:a4ed52f9abd3ff9cc34960c0c41f72e9"
- }
- },
- "systemMetadata": {
- "lastObserved": 1643871600000,
- "runId": "powerbi-test"
+ "runId": "powerbi-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -2085,7 +2344,8 @@
},
"systemMetadata": {
"lastObserved": 1643871600000,
- "runId": "powerbi-test"
+ "runId": "powerbi-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -2100,7 +2360,8 @@
},
"systemMetadata": {
"lastObserved": 1643871600000,
- "runId": "powerbi-test"
+ "runId": "powerbi-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -2126,6 +2387,9 @@
}
},
"inputs": [
+ {
+ "string": "urn:li:container:6ac0662f0f2fc3a9196ac505da2182b2"
+ },
{
"string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.public_issue_history,DEV)"
},
@@ -2152,7 +2416,8 @@
},
"systemMetadata": {
"lastObserved": 1643871600000,
- "runId": "powerbi-test"
+ "runId": "powerbi-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -2167,7 +2432,8 @@
},
"systemMetadata": {
"lastObserved": 1643871600000,
- "runId": "powerbi-test"
+ "runId": "powerbi-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -2184,7 +2450,8 @@
},
"systemMetadata": {
"lastObserved": 1643871600000,
- "runId": "powerbi-test"
+ "runId": "powerbi-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -2199,7 +2466,8 @@
},
"systemMetadata": {
"lastObserved": 1643871600000,
- "runId": "powerbi-test"
+ "runId": "powerbi-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -2219,7 +2487,8 @@
},
"systemMetadata": {
"lastObserved": 1643871600000,
- "runId": "powerbi-test"
+ "runId": "powerbi-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -2245,6 +2514,9 @@
}
},
"inputs": [
+ {
+ "string": "urn:li:container:6ac0662f0f2fc3a9196ac505da2182b2"
+ },
{
"string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.public_issue_history,DEV)"
},
@@ -2271,7 +2543,8 @@
},
"systemMetadata": {
"lastObserved": 1643871600000,
- "runId": "powerbi-test"
+ "runId": "powerbi-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -2286,7 +2559,8 @@
},
"systemMetadata": {
"lastObserved": 1643871600000,
- "runId": "powerbi-test"
+ "runId": "powerbi-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -2303,7 +2577,8 @@
},
"systemMetadata": {
"lastObserved": 1643871600000,
- "runId": "powerbi-test"
+ "runId": "powerbi-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -2318,7 +2593,8 @@
},
"systemMetadata": {
"lastObserved": 1643871600000,
- "runId": "powerbi-test"
+ "runId": "powerbi-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -2338,7 +2614,8 @@
},
"systemMetadata": {
"lastObserved": 1643871600000,
- "runId": "powerbi-test"
+ "runId": "powerbi-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -2355,7 +2632,8 @@
},
"systemMetadata": {
"lastObserved": 1643871600000,
- "runId": "powerbi-test"
+ "runId": "powerbi-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -2388,7 +2666,8 @@
},
"systemMetadata": {
"lastObserved": 1643871600000,
- "runId": "powerbi-test"
+ "runId": "powerbi-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -2403,7 +2682,8 @@
},
"systemMetadata": {
"lastObserved": 1643871600000,
- "runId": "powerbi-test"
+ "runId": "powerbi-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -2419,7 +2699,8 @@
},
"systemMetadata": {
"lastObserved": 1643871600000,
- "runId": "powerbi-test"
+ "runId": "powerbi-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -2436,7 +2717,8 @@
},
"systemMetadata": {
"lastObserved": 1643871600000,
- "runId": "powerbi-test"
+ "runId": "powerbi-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -2464,7 +2746,8 @@
},
"systemMetadata": {
"lastObserved": 1643871600000,
- "runId": "powerbi-test"
+ "runId": "powerbi-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -2479,7 +2762,8 @@
},
"systemMetadata": {
"lastObserved": 1643871600000,
- "runId": "powerbi-test"
+ "runId": "powerbi-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -2499,7 +2783,8 @@
},
"systemMetadata": {
"lastObserved": 1643871600000,
- "runId": "powerbi-test"
+ "runId": "powerbi-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -2514,7 +2799,8 @@
},
"systemMetadata": {
"lastObserved": 1643871600000,
- "runId": "powerbi-test"
+ "runId": "powerbi-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -2529,7 +2815,310 @@
},
"systemMetadata": {
"lastObserved": 1643871600000,
- "runId": "powerbi-test"
+ "runId": "powerbi-test",
+ "lastRunId": "no-run-id-provided"
+ }
+},
+{
+ "entityType": "corpuser",
+ "entityUrn": "urn:li:corpuser:users.User4@foo.com",
+ "changeType": "UPSERT",
+ "aspectName": "corpUserKey",
+ "aspect": {
+ "json": {
+ "username": "User4@foo.com"
+ }
+ },
+ "systemMetadata": {
+ "lastObserved": 1643871600000,
+ "runId": "powerbi-test",
+ "lastRunId": "no-run-id-provided"
+ }
+},
+{
+ "entityType": "corpuser",
+ "entityUrn": "urn:li:corpuser:users.User4@foo.com",
+ "changeType": "UPSERT",
+ "aspectName": "status",
+ "aspect": {
+ "json": {
+ "removed": false
+ }
+ },
+ "systemMetadata": {
+ "lastObserved": 1643871600000,
+ "runId": "powerbi-test",
+ "lastRunId": "no-run-id-provided"
+ }
+},
+{
+ "entityType": "corpuser",
+ "entityUrn": "urn:li:corpuser:users.User3@foo.com",
+ "changeType": "UPSERT",
+ "aspectName": "corpUserKey",
+ "aspect": {
+ "json": {
+ "username": "User3@foo.com"
+ }
+ },
+ "systemMetadata": {
+ "lastObserved": 1643871600000,
+ "runId": "powerbi-test",
+ "lastRunId": "no-run-id-provided"
+ }
+},
+{
+ "entityType": "corpuser",
+ "entityUrn": "urn:li:corpuser:users.User3@foo.com",
+ "changeType": "UPSERT",
+ "aspectName": "status",
+ "aspect": {
+ "json": {
+ "removed": false
+ }
+ },
+ "systemMetadata": {
+ "lastObserved": 1643871600000,
+ "runId": "powerbi-test",
+ "lastRunId": "no-run-id-provided"
+ }
+},
+{
+ "entityType": "container",
+ "entityUrn": "urn:li:container:33c7cab6ea0e58930cd6f943d0a4111e",
+ "changeType": "UPSERT",
+ "aspectName": "status",
+ "aspect": {
+ "json": {
+ "removed": false
+ }
+ },
+ "systemMetadata": {
+ "lastObserved": 1643871600000,
+ "runId": "powerbi-test",
+ "lastRunId": "no-run-id-provided"
+ }
+},
+{
+ "entityType": "container",
+ "entityUrn": "urn:li:container:33c7cab6ea0e58930cd6f943d0a4111e",
+ "changeType": "UPSERT",
+ "aspectName": "containerProperties",
+ "aspect": {
+ "json": {
+ "customProperties": {
+ "platform": "powerbi",
+ "workspace": "second-demo-workspace"
+ },
+ "name": "second-demo-workspace"
+ }
+ },
+ "systemMetadata": {
+ "lastObserved": 1643871600000,
+ "runId": "powerbi-test",
+ "lastRunId": "no-run-id-provided"
+ }
+},
+{
+ "entityType": "container",
+ "entityUrn": "urn:li:container:33c7cab6ea0e58930cd6f943d0a4111e",
+ "changeType": "UPSERT",
+ "aspectName": "browsePathsV2",
+ "aspect": {
+ "json": {
+ "path": []
+ }
+ },
+ "systemMetadata": {
+ "lastObserved": 1643871600000,
+ "runId": "powerbi-test",
+ "lastRunId": "no-run-id-provided"
+ }
+},
+{
+ "entityType": "container",
+ "entityUrn": "urn:li:container:33c7cab6ea0e58930cd6f943d0a4111e",
+ "changeType": "UPSERT",
+ "aspectName": "dataPlatformInstance",
+ "aspect": {
+ "json": {
+ "platform": "urn:li:dataPlatform:powerbi"
+ }
+ },
+ "systemMetadata": {
+ "lastObserved": 1643871600000,
+ "runId": "powerbi-test",
+ "lastRunId": "no-run-id-provided"
+ }
+},
+{
+ "entityType": "container",
+ "entityUrn": "urn:li:container:33c7cab6ea0e58930cd6f943d0a4111e",
+ "changeType": "UPSERT",
+ "aspectName": "subTypes",
+ "aspect": {
+ "json": {
+ "typeNames": [
+ "Workspace"
+ ]
+ }
+ },
+ "systemMetadata": {
+ "lastObserved": 1643871600000,
+ "runId": "powerbi-test",
+ "lastRunId": "no-run-id-provided"
+ }
+},
+{
+ "entityType": "dashboard",
+ "entityUrn": "urn:li:dashboard:(powerbi,dashboards.7D668CAD-8FFC-4505-9215-655BCA5BEBAE)",
+ "changeType": "UPSERT",
+ "aspectName": "browsePaths",
+ "aspect": {
+ "json": {
+ "paths": [
+ "/powerbi/second-demo-workspace"
+ ]
+ }
+ },
+ "systemMetadata": {
+ "lastObserved": 1643871600000,
+ "runId": "powerbi-test",
+ "lastRunId": "no-run-id-provided"
+ }
+},
+{
+ "entityType": "dashboard",
+ "entityUrn": "urn:li:dashboard:(powerbi,dashboards.7D668CAD-8FFC-4505-9215-655BCA5BEBAE)",
+ "changeType": "UPSERT",
+ "aspectName": "status",
+ "aspect": {
+ "json": {
+ "removed": false
+ }
+ },
+ "systemMetadata": {
+ "lastObserved": 1643871600000,
+ "runId": "powerbi-test",
+ "lastRunId": "no-run-id-provided"
+ }
+},
+{
+ "entityType": "dashboard",
+ "entityUrn": "urn:li:dashboard:(powerbi,dashboards.7D668CAD-8FFC-4505-9215-655BCA5BEBAE)",
+ "changeType": "UPSERT",
+ "aspectName": "browsePathsV2",
+ "aspect": {
+ "json": {
+ "path": [
+ {
+ "id": "urn:li:container:33c7cab6ea0e58930cd6f943d0a4111e",
+ "urn": "urn:li:container:33c7cab6ea0e58930cd6f943d0a4111e"
+ }
+ ]
+ }
+ },
+ "systemMetadata": {
+ "lastObserved": 1643871600000,
+ "runId": "powerbi-test",
+ "lastRunId": "no-run-id-provided"
+ }
+},
+{
+ "entityType": "dashboard",
+ "entityUrn": "urn:li:dashboard:(powerbi,dashboards.7D668CAD-8FFC-4505-9215-655BCA5BEBAE)",
+ "changeType": "UPSERT",
+ "aspectName": "dashboardInfo",
+ "aspect": {
+ "json": {
+ "customProperties": {
+ "chartCount": "0",
+ "workspaceName": "second-demo-workspace",
+ "workspaceId": "64ED5CAD-7C22-4684-8180-826122881108"
+ },
+ "title": "test_dashboard2",
+ "description": "",
+ "charts": [],
+ "datasets": [],
+ "lastModified": {
+ "created": {
+ "time": 0,
+ "actor": "urn:li:corpuser:unknown"
+ },
+ "lastModified": {
+ "time": 0,
+ "actor": "urn:li:corpuser:unknown"
+ }
+ },
+ "dashboardUrl": "https://localhost/dashboards/web/1"
+ }
+ },
+ "systemMetadata": {
+ "lastObserved": 1643871600000,
+ "runId": "powerbi-test",
+ "lastRunId": "no-run-id-provided"
+ }
+},
+{
+ "entityType": "dashboard",
+ "entityUrn": "urn:li:dashboard:(powerbi,dashboards.7D668CAD-8FFC-4505-9215-655BCA5BEBAE)",
+ "changeType": "UPSERT",
+ "aspectName": "dashboardKey",
+ "aspect": {
+ "json": {
+ "dashboardTool": "powerbi",
+ "dashboardId": "powerbi.linkedin.com/dashboards/7D668CAD-8FFC-4505-9215-655BCA5BEBAE"
+ }
+ },
+ "systemMetadata": {
+ "lastObserved": 1643871600000,
+ "runId": "powerbi-test",
+ "lastRunId": "no-run-id-provided"
+ }
+},
+{
+ "entityType": "dashboard",
+ "entityUrn": "urn:li:dashboard:(powerbi,dashboards.7D668CAD-8FFC-4505-9215-655BCA5BEBAE)",
+ "changeType": "UPSERT",
+ "aspectName": "container",
+ "aspect": {
+ "json": {
+ "container": "urn:li:container:33c7cab6ea0e58930cd6f943d0a4111e"
+ }
+ },
+ "systemMetadata": {
+ "lastObserved": 1643871600000,
+ "runId": "powerbi-test",
+ "lastRunId": "no-run-id-provided"
+ }
+},
+{
+ "entityType": "dashboard",
+ "entityUrn": "urn:li:dashboard:(powerbi,dashboards.7D668CAD-8FFC-4505-9215-655BCA5BEBAE)",
+ "changeType": "UPSERT",
+ "aspectName": "ownership",
+ "aspect": {
+ "json": {
+ "owners": [
+ {
+ "owner": "urn:li:corpuser:users.User3@foo.com",
+ "type": "NONE"
+ },
+ {
+ "owner": "urn:li:corpuser:users.User4@foo.com",
+ "type": "NONE"
+ }
+ ],
+ "lastModified": {
+ "time": 0,
+ "actor": "urn:li:corpuser:unknown"
+ }
+ }
+ },
+ "systemMetadata": {
+ "lastObserved": 1643871600000,
+ "runId": "powerbi-test",
+ "lastRunId": "no-run-id-provided"
}
}
]
\ No newline at end of file
diff --git a/metadata-ingestion/tests/integration/powerbi/golden_test_most_config_and_modified_since_admin_only.json b/metadata-ingestion/tests/integration/powerbi/golden_test_most_config_and_modified_since_admin_only.json
index a4527b9715704..b301ca1c1b988 100644
--- a/metadata-ingestion/tests/integration/powerbi/golden_test_most_config_and_modified_since_admin_only.json
+++ b/metadata-ingestion/tests/integration/powerbi/golden_test_most_config_and_modified_since_admin_only.json
@@ -15,7 +15,8 @@
},
"systemMetadata": {
"lastObserved": 1643871600000,
- "runId": "powerbi-test"
+ "runId": "powerbi-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -30,7 +31,8 @@
},
"systemMetadata": {
"lastObserved": 1643871600000,
- "runId": "powerbi-test"
+ "runId": "powerbi-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -45,7 +47,8 @@
},
"systemMetadata": {
"lastObserved": 1643871600000,
- "runId": "powerbi-test"
+ "runId": "powerbi-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -62,7 +65,8 @@
},
"systemMetadata": {
"lastObserved": 1643871600000,
- "runId": "powerbi-test"
+ "runId": "powerbi-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -77,7 +81,8 @@
},
"systemMetadata": {
"lastObserved": 1643871600000,
- "runId": "powerbi-test"
+ "runId": "powerbi-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -94,7 +99,8 @@
},
"systemMetadata": {
"lastObserved": 1643871600000,
- "runId": "powerbi-test"
+ "runId": "powerbi-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -126,7 +132,8 @@
},
"systemMetadata": {
"lastObserved": 1643871600000,
- "runId": "powerbi-test"
+ "runId": "powerbi-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -147,7 +154,8 @@
},
"systemMetadata": {
"lastObserved": 1643871600000,
- "runId": "powerbi-test"
+ "runId": "powerbi-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -162,7 +170,8 @@
},
"systemMetadata": {
"lastObserved": 1643871600000,
- "runId": "powerbi-test"
+ "runId": "powerbi-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -180,7 +189,8 @@
},
"systemMetadata": {
"lastObserved": 1643871600000,
- "runId": "powerbi-test"
+ "runId": "powerbi-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -204,7 +214,8 @@
},
"systemMetadata": {
"lastObserved": 1643871600000,
- "runId": "powerbi-test"
+ "runId": "powerbi-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -219,7 +230,8 @@
},
"systemMetadata": {
"lastObserved": 1643871600000,
- "runId": "powerbi-test"
+ "runId": "powerbi-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -238,7 +250,8 @@
},
"systemMetadata": {
"lastObserved": 1643871600000,
- "runId": "powerbi-test"
+ "runId": "powerbi-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -249,6 +262,10 @@
"aspect": {
"json": {
"path": [
+ {
+ "id": "urn:li:container:e3dc21b5c79f9d594f639a9f57d7f2c3",
+ "urn": "urn:li:container:e3dc21b5c79f9d594f639a9f57d7f2c3"
+ },
{
"id": "urn:li:container:977b804137a1d2bf897ff1bbf440a1cc",
"urn": "urn:li:container:977b804137a1d2bf897ff1bbf440a1cc"
@@ -258,7 +275,8 @@
},
"systemMetadata": {
"lastObserved": 1643871600000,
- "runId": "powerbi-test"
+ "runId": "powerbi-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -275,7 +293,8 @@
},
"systemMetadata": {
"lastObserved": 1643871600000,
- "runId": "powerbi-test"
+ "runId": "powerbi-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -307,7 +326,8 @@
},
"systemMetadata": {
"lastObserved": 1643871600000,
- "runId": "powerbi-test"
+ "runId": "powerbi-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -328,7 +348,8 @@
},
"systemMetadata": {
"lastObserved": 1643871600000,
- "runId": "powerbi-test"
+ "runId": "powerbi-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -343,7 +364,8 @@
},
"systemMetadata": {
"lastObserved": 1643871600000,
- "runId": "powerbi-test"
+ "runId": "powerbi-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -361,7 +383,8 @@
},
"systemMetadata": {
"lastObserved": 1643871600000,
- "runId": "powerbi-test"
+ "runId": "powerbi-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -385,7 +408,8 @@
},
"systemMetadata": {
"lastObserved": 1643871600000,
- "runId": "powerbi-test"
+ "runId": "powerbi-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -400,7 +424,8 @@
},
"systemMetadata": {
"lastObserved": 1643871600000,
- "runId": "powerbi-test"
+ "runId": "powerbi-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -419,7 +444,8 @@
},
"systemMetadata": {
"lastObserved": 1643871600000,
- "runId": "powerbi-test"
+ "runId": "powerbi-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -430,6 +456,10 @@
"aspect": {
"json": {
"path": [
+ {
+ "id": "urn:li:container:e3dc21b5c79f9d594f639a9f57d7f2c3",
+ "urn": "urn:li:container:e3dc21b5c79f9d594f639a9f57d7f2c3"
+ },
{
"id": "urn:li:container:977b804137a1d2bf897ff1bbf440a1cc",
"urn": "urn:li:container:977b804137a1d2bf897ff1bbf440a1cc"
@@ -439,7 +469,8 @@
},
"systemMetadata": {
"lastObserved": 1643871600000,
- "runId": "powerbi-test"
+ "runId": "powerbi-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -456,7 +487,8 @@
},
"systemMetadata": {
"lastObserved": 1643871600000,
- "runId": "powerbi-test"
+ "runId": "powerbi-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -540,7 +572,8 @@
},
"systemMetadata": {
"lastObserved": 1643871600000,
- "runId": "powerbi-test"
+ "runId": "powerbi-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -561,7 +594,8 @@
},
"systemMetadata": {
"lastObserved": 1643871600000,
- "runId": "powerbi-test"
+ "runId": "powerbi-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -576,7 +610,8 @@
},
"systemMetadata": {
"lastObserved": 1643871600000,
- "runId": "powerbi-test"
+ "runId": "powerbi-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -594,7 +629,8 @@
},
"systemMetadata": {
"lastObserved": 1643871600000,
- "runId": "powerbi-test"
+ "runId": "powerbi-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -618,7 +654,8 @@
},
"systemMetadata": {
"lastObserved": 1643871600000,
- "runId": "powerbi-test"
+ "runId": "powerbi-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -633,7 +670,8 @@
},
"systemMetadata": {
"lastObserved": 1643871600000,
- "runId": "powerbi-test"
+ "runId": "powerbi-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -652,7 +690,8 @@
},
"systemMetadata": {
"lastObserved": 1643871600000,
- "runId": "powerbi-test"
+ "runId": "powerbi-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -663,6 +702,10 @@
"aspect": {
"json": {
"path": [
+ {
+ "id": "urn:li:container:e3dc21b5c79f9d594f639a9f57d7f2c3",
+ "urn": "urn:li:container:e3dc21b5c79f9d594f639a9f57d7f2c3"
+ },
{
"id": "urn:li:container:977b804137a1d2bf897ff1bbf440a1cc",
"urn": "urn:li:container:977b804137a1d2bf897ff1bbf440a1cc"
@@ -672,7 +715,8 @@
},
"systemMetadata": {
"lastObserved": 1643871600000,
- "runId": "powerbi-test"
+ "runId": "powerbi-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -704,7 +748,8 @@
},
"systemMetadata": {
"lastObserved": 1643871600000,
- "runId": "powerbi-test"
+ "runId": "powerbi-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -719,7 +764,8 @@
},
"systemMetadata": {
"lastObserved": 1643871600000,
- "runId": "powerbi-test"
+ "runId": "powerbi-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -735,7 +781,8 @@
},
"systemMetadata": {
"lastObserved": 1643871600000,
- "runId": "powerbi-test"
+ "runId": "powerbi-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -752,7 +799,8 @@
},
"systemMetadata": {
"lastObserved": 1643871600000,
- "runId": "powerbi-test"
+ "runId": "powerbi-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -767,7 +815,8 @@
},
"systemMetadata": {
"lastObserved": 1643871600000,
- "runId": "powerbi-test"
+ "runId": "powerbi-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -787,7 +836,8 @@
},
"systemMetadata": {
"lastObserved": 1643871600000,
- "runId": "powerbi-test"
+ "runId": "powerbi-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -815,6 +865,9 @@
}
},
"inputs": [
+ {
+ "string": "urn:li:container:977b804137a1d2bf897ff1bbf440a1cc"
+ },
{
"string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,hr_pbi_test.dbo_book_issue,DEV)"
},
@@ -829,7 +882,8 @@
},
"systemMetadata": {
"lastObserved": 1643871600000,
- "runId": "powerbi-test"
+ "runId": "powerbi-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -844,7 +898,8 @@
},
"systemMetadata": {
"lastObserved": 1643871600000,
- "runId": "powerbi-test"
+ "runId": "powerbi-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -860,7 +915,8 @@
},
"systemMetadata": {
"lastObserved": 1643871600000,
- "runId": "powerbi-test"
+ "runId": "powerbi-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -877,7 +933,8 @@
},
"systemMetadata": {
"lastObserved": 1643871600000,
- "runId": "powerbi-test"
+ "runId": "powerbi-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -892,7 +949,8 @@
},
"systemMetadata": {
"lastObserved": 1643871600000,
- "runId": "powerbi-test"
+ "runId": "powerbi-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -912,7 +970,8 @@
},
"systemMetadata": {
"lastObserved": 1643871600000,
- "runId": "powerbi-test"
+ "runId": "powerbi-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -929,7 +988,8 @@
},
"systemMetadata": {
"lastObserved": 1643871600000,
- "runId": "powerbi-test"
+ "runId": "powerbi-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -966,7 +1026,8 @@
},
"systemMetadata": {
"lastObserved": 1643871600000,
- "runId": "powerbi-test"
+ "runId": "powerbi-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -981,7 +1042,8 @@
},
"systemMetadata": {
"lastObserved": 1643871600000,
- "runId": "powerbi-test"
+ "runId": "powerbi-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -997,7 +1059,8 @@
},
"systemMetadata": {
"lastObserved": 1643871600000,
- "runId": "powerbi-test"
+ "runId": "powerbi-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1012,7 +1075,8 @@
},
"systemMetadata": {
"lastObserved": 1643871600000,
- "runId": "powerbi-test"
+ "runId": "powerbi-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1032,7 +1096,8 @@
},
"systemMetadata": {
"lastObserved": 1643871600000,
- "runId": "powerbi-test"
+ "runId": "powerbi-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1049,7 +1114,8 @@
},
"systemMetadata": {
"lastObserved": 1643871600000,
- "runId": "powerbi-test"
+ "runId": "powerbi-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1079,7 +1145,8 @@
},
"systemMetadata": {
"lastObserved": 1643871600000,
- "runId": "powerbi-test"
+ "runId": "powerbi-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1094,7 +1161,8 @@
},
"systemMetadata": {
"lastObserved": 1643871600000,
- "runId": "powerbi-test"
+ "runId": "powerbi-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1110,7 +1178,8 @@
},
"systemMetadata": {
"lastObserved": 1643871600000,
- "runId": "powerbi-test"
+ "runId": "powerbi-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1127,7 +1196,8 @@
},
"systemMetadata": {
"lastObserved": 1643871600000,
- "runId": "powerbi-test"
+ "runId": "powerbi-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1151,7 +1221,8 @@
},
"systemMetadata": {
"lastObserved": 1643871600000,
- "runId": "powerbi-test"
+ "runId": "powerbi-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1166,7 +1237,8 @@
},
"systemMetadata": {
"lastObserved": 1643871600000,
- "runId": "powerbi-test"
+ "runId": "powerbi-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1186,7 +1258,8 @@
},
"systemMetadata": {
"lastObserved": 1643871600000,
- "runId": "powerbi-test"
+ "runId": "powerbi-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1205,7 +1278,8 @@
},
"systemMetadata": {
"lastObserved": 1643871600000,
- "runId": "powerbi-test"
+ "runId": "powerbi-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1220,7 +1294,8 @@
},
"systemMetadata": {
"lastObserved": 1643871600000,
- "runId": "powerbi-test"
+ "runId": "powerbi-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1235,7 +1310,8 @@
},
"systemMetadata": {
"lastObserved": 1643871600000,
- "runId": "powerbi-test"
+ "runId": "powerbi-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1252,7 +1328,8 @@
},
"systemMetadata": {
"lastObserved": 1643871600000,
- "runId": "powerbi-test"
+ "runId": "powerbi-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1267,7 +1344,8 @@
},
"systemMetadata": {
"lastObserved": 1643871600000,
- "runId": "powerbi-test"
+ "runId": "powerbi-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1287,7 +1365,8 @@
},
"systemMetadata": {
"lastObserved": 1643871600000,
- "runId": "powerbi-test"
+ "runId": "powerbi-test",
+ "lastRunId": "no-run-id-provided"
}
},
{
@@ -1302,7 +1381,8 @@
},
"systemMetadata": {
"lastObserved": 1643871600000,
- "runId": "powerbi-test"
+ "runId": "powerbi-test",
+ "lastRunId": "no-run-id-provided"
}
}
]
\ No newline at end of file
diff --git a/metadata-ingestion/tests/integration/powerbi/test_powerbi.py b/metadata-ingestion/tests/integration/powerbi/test_powerbi.py
index 7232d2a38da1d..c9b0ded433749 100644
--- a/metadata-ingestion/tests/integration/powerbi/test_powerbi.py
+++ b/metadata-ingestion/tests/integration/powerbi/test_powerbi.py
@@ -1039,7 +1039,11 @@ def test_workspace_container(
"type": "powerbi",
"config": {
**default_source_config(),
+ "workspace_id_pattern": {
+ "deny": ["64ED5CAD-7322-4684-8180-826122881108"],
+ },
"extract_workspaces_to_containers": True,
+ "extract_datasets_to_containers": True,
"extract_reports": True,
},
},
From bfa1769d4dd4f5281d751c6998c586e4e021897d Mon Sep 17 00:00:00 2001
From: John Joyce
Date: Thu, 9 Nov 2023 17:56:33 -0800
Subject: [PATCH 085/792] refactor(schema tab): Remove last observed timestamps
from schema tab (#9188)
---
.../schema/SchemaTimeStamps.test.tsx | 23 -------
.../schema/components/SchemaHeader.tsx | 6 --
.../schema/components/SchemaTimeStamps.tsx | 64 -------------------
.../shared/tabs/Dataset/Schema/SchemaTab.tsx | 5 --
4 files changed, 98 deletions(-)
delete mode 100644 datahub-web-react/src/app/entity/dataset/profile/__tests__/schema/SchemaTimeStamps.test.tsx
delete mode 100644 datahub-web-react/src/app/entity/dataset/profile/schema/components/SchemaTimeStamps.tsx
diff --git a/datahub-web-react/src/app/entity/dataset/profile/__tests__/schema/SchemaTimeStamps.test.tsx b/datahub-web-react/src/app/entity/dataset/profile/__tests__/schema/SchemaTimeStamps.test.tsx
deleted file mode 100644
index c8bb5d8100f2a..0000000000000
--- a/datahub-web-react/src/app/entity/dataset/profile/__tests__/schema/SchemaTimeStamps.test.tsx
+++ /dev/null
@@ -1,23 +0,0 @@
-import { render } from '@testing-library/react';
-import React from 'react';
-import { toRelativeTimeString } from '../../../../../shared/time/timeUtils';
-import SchemaTimeStamps from '../../schema/components/SchemaTimeStamps';
-
-describe('SchemaTimeStamps', () => {
- it('should render last observed text if lastObserved is not null', () => {
- const { getByText, queryByText } = render( );
- expect(getByText(`Last observed ${toRelativeTimeString(123)}`)).toBeInTheDocument();
- expect(queryByText(`Reported ${toRelativeTimeString(123)}`)).toBeNull();
- });
-
- it('should render last updated text if lastObserved is null', () => {
- const { getByText, queryByText } = render( );
- expect(queryByText(`Last observed ${toRelativeTimeString(123)}`)).toBeNull();
- expect(getByText(`Reported ${toRelativeTimeString(123)}`)).toBeInTheDocument();
- });
-
- it('should return null if lastUpdated and lastObserved are both null', () => {
- const { container } = render( );
- expect(container.firstChild).toBeNull();
- });
-});
diff --git a/datahub-web-react/src/app/entity/dataset/profile/schema/components/SchemaHeader.tsx b/datahub-web-react/src/app/entity/dataset/profile/schema/components/SchemaHeader.tsx
index 9e9e0ede2a1ce..2fc8fc11cd1b2 100644
--- a/datahub-web-react/src/app/entity/dataset/profile/schema/components/SchemaHeader.tsx
+++ b/datahub-web-react/src/app/entity/dataset/profile/schema/components/SchemaHeader.tsx
@@ -17,7 +17,6 @@ import { SemanticVersionStruct } from '../../../../../../types.generated';
import { toRelativeTimeString } from '../../../../../shared/time/timeUtils';
import { ANTD_GRAY, REDESIGN_COLORS } from '../../../../shared/constants';
import { navigateToVersionedDatasetUrl } from '../../../../shared/tabs/Dataset/Schema/utils/navigateToVersionedDatasetUrl';
-import SchemaTimeStamps from './SchemaTimeStamps';
import getSchemaFilterFromQueryString from '../../../../shared/tabs/Dataset/Schema/utils/getSchemaFilterFromQueryString';
const SchemaHeaderContainer = styled.div`
@@ -137,8 +136,6 @@ type Props = {
hasKeySchema: boolean;
showKeySchema: boolean;
setShowKeySchema: (show: boolean) => void;
- lastUpdated?: number | null;
- lastObserved?: number | null;
selectedVersion: string;
versionList: Array;
showSchemaAuditView: boolean;
@@ -158,8 +155,6 @@ export default function SchemaHeader({
hasKeySchema,
showKeySchema,
setShowKeySchema,
- lastUpdated,
- lastObserved,
selectedVersion,
versionList,
showSchemaAuditView,
@@ -255,7 +250,6 @@ export default function SchemaHeader({
)}
-
- {lastObserved && (
- Last observed on {toLocalDateTimeString(lastObserved)}.
- )}
- {lastUpdated && First reported on {toLocalDateTimeString(lastUpdated)}.
}
- >
- }
- >
-
- {lastObserved && (
-
- Last observed {toRelativeTimeString(lastObserved)}
-
- )}
- {!lastObserved && lastUpdated && (
-
-
- Reported {toRelativeTimeString(lastUpdated)}
-
- )}
-
-
- );
-}
-
-export default SchemaTimeStamps;
diff --git a/datahub-web-react/src/app/entity/shared/tabs/Dataset/Schema/SchemaTab.tsx b/datahub-web-react/src/app/entity/shared/tabs/Dataset/Schema/SchemaTab.tsx
index 4bdb2dac033e7..75027e17b6d0c 100644
--- a/datahub-web-react/src/app/entity/shared/tabs/Dataset/Schema/SchemaTab.tsx
+++ b/datahub-web-react/src/app/entity/shared/tabs/Dataset/Schema/SchemaTab.tsx
@@ -151,9 +151,6 @@ export const SchemaTab = ({ properties }: { properties?: any }) => {
return groupByFieldPath(filteredRows, { showKeySchema });
}, [showKeySchema, filteredRows]);
- const lastUpdated = getSchemaBlameData?.getSchemaBlame?.version?.semanticVersionTimestamp;
- const lastObserved = versionedDatasetData.data?.versionedDataset?.schema?.lastObserved;
-
const schemaFieldBlameList: Array =
(getSchemaBlameData?.getSchemaBlame?.schemaFieldBlameList as Array) || [];
@@ -167,8 +164,6 @@ export const SchemaTab = ({ properties }: { properties?: any }) => {
hasKeySchema={hasKeySchema}
showKeySchema={showKeySchema}
setShowKeySchema={setShowKeySchema}
- lastObserved={lastObserved}
- lastUpdated={lastUpdated}
selectedVersion={selectedVersion}
versionList={versionList}
showSchemaAuditView={showSchemaAuditView}
From 9c0f4de38241477524682943c815d5c03259e1a5 Mon Sep 17 00:00:00 2001
From: Hyejin Yoon <0327jane@gmail.com>
Date: Fri, 10 Nov 2023 16:06:06 +0900
Subject: [PATCH 086/792] docs: adjust sidebar & create new admin section
(#9064)
---
docs-website/sidebars.js | 213 ++++++++++++++++++++------------------
docs/CODE_OF_CONDUCT.md | 2 +-
docs/saas.md | 14 ---
docs/townhall-history.md | 216 +++++++++++++++++++++++----------------
docs/townhalls.md | 11 +-
5 files changed, 253 insertions(+), 203 deletions(-)
delete mode 100644 docs/saas.md
diff --git a/docs-website/sidebars.js b/docs-website/sidebars.js
index 4d2420256ebff..f15f2927379c5 100644
--- a/docs-website/sidebars.js
+++ b/docs-website/sidebars.js
@@ -9,17 +9,13 @@ module.exports = {
overviewSidebar: [
{
- label: "Getting Started",
+ label: "What Is DataHub?",
type: "category",
collapsed: true,
+ link: { type: "doc", id: "docs/features" },
items: [
// By the end of this section, readers should understand the core use cases that DataHub addresses,
// target end-users, high-level architecture, & hosting options
- {
- type: "doc",
- label: "Introduction",
- id: "docs/features",
- },
{
type: "doc",
label: "Quickstart",
@@ -31,7 +27,6 @@ module.exports = {
href: "https://demo.datahubproject.io/",
},
"docs/what-is-datahub/datahub-concepts",
- "docs/saas",
],
},
{
@@ -161,7 +156,15 @@ module.exports = {
"docs/deploy/azure",
"docker/README",
"docs/deploy/kubernetes",
+ "docs/deploy/confluent-cloud",
"docs/deploy/environment-vars",
+ "docs/how/extract-container-logs",
+ ],
+ },
+ {
+ type: "category",
+ label: "Admin",
+ items: [
{
Authentication: [
"docs/authentication/README",
@@ -195,20 +198,91 @@ module.exports = {
"docs/how/restore-indices",
"docs/advanced/db-retention",
"docs/advanced/monitoring",
- "docs/how/extract-container-logs",
"docs/deploy/telemetry",
"docs/how/kafka-config",
- "docs/deploy/confluent-cloud",
"docs/advanced/no-code-upgrade",
"docs/how/jattach-guide",
],
},
- "docs/how/updating-datahub",
],
},
{
- API: [
- "docs/api/datahub-apis",
+ Developers: [
+ // The purpose of this section is to provide developers & technical users with
+ // concrete tutorials for how to work with the DataHub CLI & APIs
+ {
+ Architecture: [
+ "docs/architecture/architecture",
+ "docs/components",
+ "docs/architecture/metadata-ingestion",
+ "docs/architecture/metadata-serving",
+ "docs/architecture/docker-containers",
+ ],
+ },
+ {
+ "Metadata Model": [
+ "docs/modeling/metadata-model",
+ "docs/modeling/extending-the-metadata-model",
+ "docs/what/mxe",
+ {
+ Entities: [
+ {
+ type: "autogenerated",
+ dirName: "docs/generated/metamodel/entities", // '.' means the current docs folder
+ },
+ ],
+ },
+ ],
+ },
+ {
+ "Developing on DataHub": [
+ "docs/developers",
+ "docs/docker/development",
+ "metadata-ingestion/developing",
+ "docs/api/graphql/graphql-endpoint-development",
+ {
+ Modules: [
+ "datahub-web-react/README",
+ "datahub-frontend/README",
+ "datahub-graphql-core/README",
+ "metadata-service/README",
+ "metadata-jobs/mae-consumer-job/README",
+ "metadata-jobs/mce-consumer-job/README",
+ ],
+ },
+ ],
+ },
+ "docs/plugins",
+ {
+ Troubleshooting: [
+ "docs/troubleshooting/quickstart",
+ "docs/troubleshooting/build",
+ "docs/troubleshooting/general",
+ ],
+ },
+ {
+ Advanced: [
+ "metadata-ingestion/docs/dev_guides/reporting_telemetry",
+ "docs/advanced/mcp-mcl",
+ "docker/datahub-upgrade/README",
+ "docs/advanced/no-code-modeling",
+ "datahub-web-react/src/app/analytics/README",
+ "docs/how/migrating-graph-service-implementation",
+ "docs/advanced/field-path-spec-v2",
+ "metadata-ingestion/adding-source",
+ "docs/how/add-custom-ingestion-source",
+ "docs/how/add-custom-data-platform",
+ "docs/advanced/browse-paths-upgrade",
+ "docs/browseV2/browse-paths-v2",
+ ],
+ },
+ ],
+ },
+ {
+ type: "category",
+ label: "API",
+ link: { type: "doc", id: "docs/api/datahub-apis" },
+ items: [
{
"GraphQL API": [
{
@@ -466,92 +540,14 @@ module.exports = {
],
},
{
- Develop: [
- // The purpose of this section is to provide developers & technical users with
- // concrete tutorials for how to work with the DataHub CLI & APIs
- {
- "DataHub Metadata Model": [
- "docs/modeling/metadata-model",
- "docs/modeling/extending-the-metadata-model",
- "docs/what/mxe",
- {
- Entities: [
- {
- type: "autogenerated",
- dirName: "docs/generated/metamodel/entities", // '.' means the current docs folder
- },
- ],
- },
- ],
- },
- {
- Architecture: [
- "docs/architecture/architecture",
- "docs/components",
- "docs/architecture/metadata-ingestion",
- "docs/architecture/metadata-serving",
- "docs/architecture/docker-containers",
- ],
- },
- {
- "Developing on DataHub": [
- "docs/developers",
- "docs/docker/development",
- "metadata-ingestion/developing",
- "docs/api/graphql/graphql-endpoint-development",
- {
- Modules: [
- "datahub-web-react/README",
- "datahub-frontend/README",
- "datahub-graphql-core/README",
- "metadata-service/README",
- "metadata-jobs/mae-consumer-job/README",
- "metadata-jobs/mce-consumer-job/README",
- ],
- },
- ],
- },
- "docs/plugins",
- {
- Troubleshooting: [
- "docs/troubleshooting/quickstart",
- "docs/troubleshooting/build",
- "docs/troubleshooting/general",
- ],
- },
- {
- Advanced: [
- "metadata-ingestion/docs/dev_guides/reporting_telemetry",
- "docs/advanced/mcp-mcl",
- "docker/datahub-upgrade/README",
- "docs/advanced/no-code-modeling",
- "datahub-web-react/src/app/analytics/README",
- "docs/how/migrating-graph-service-implementation",
- "docs/advanced/field-path-spec-v2",
- "metadata-ingestion/adding-source",
- "docs/how/add-custom-ingestion-source",
- "docs/how/add-custom-data-platform",
- "docs/advanced/browse-paths-upgrade",
- "docs/browseV2/browse-paths-v2",
- ],
- },
- ],
- },
- {
- Community: [
- "docs/slack",
- "docs/townhalls",
- "docs/townhall-history",
- "docs/CODE_OF_CONDUCT",
- "docs/CONTRIBUTING",
- "docs/links",
- "docs/rfc",
- "SECURITY",
- ],
- },
- {
- "Managed DataHub": [
- "docs/managed-datahub/managed-datahub-overview",
+ label: "Managed DataHub",
+ type: "category",
+ collapsed: true,
+ link: {
+ type: "doc",
+ id: "docs/managed-datahub/managed-datahub-overview",
+ },
+ items: [
"docs/managed-datahub/welcome-acryl",
{
type: "doc",
@@ -648,7 +644,26 @@ module.exports = {
],
},
{
- "Release History": ["releases"],
+ label: "Community",
+ type: "category",
+ collapsed: true,
+ link: {
+ type: "generated-index",
+ title: "Community",
+ description: "Learn about DataHub community.",
+ },
+ items: [
+ "docs/slack",
+ "docs/townhalls",
+ // "docs/townhall-history",
+ "docs/CODE_OF_CONDUCT",
+ "docs/CONTRIBUTING",
+ "docs/links",
+ "docs/rfc",
+ ],
+ },
+ {
+ "Release History": ["releases", "docs/how/updating-datahub"],
},
// "Candidates for Deprecation": [
diff --git a/docs/CODE_OF_CONDUCT.md b/docs/CODE_OF_CONDUCT.md
index 1c4fd659f14e0..ca899dc26d5f7 100644
--- a/docs/CODE_OF_CONDUCT.md
+++ b/docs/CODE_OF_CONDUCT.md
@@ -1,4 +1,4 @@
-# Contributor Covenant Code of Conduct
+# Code of Conduct
## Our Pledge
diff --git a/docs/saas.md b/docs/saas.md
deleted file mode 100644
index de57b5617e062..0000000000000
--- a/docs/saas.md
+++ /dev/null
@@ -1,14 +0,0 @@
-# DataHub SaaS
-
-Sign up for fully managed, hassle-free and secure SaaS service for DataHub, provided by [Acryl Data](https://www.acryl.io/).
-
-
-
- Sign up
-
-
-
-Refer to [Managed Datahub Exclusives](/docs/managed-datahub/managed-datahub-overview.md) for more information.
diff --git a/docs/townhall-history.md b/docs/townhall-history.md
index d92905af0cd72..0242e4ec2cee1 100644
--- a/docs/townhall-history.md
+++ b/docs/townhall-history.md
@@ -1,22 +1,55 @@
-# Town Hall History
+# Town Hall History
-A list of previous Town Halls, their planned schedule, and the recording of the meeting.
+:::note
+For the Town Hall meetings after June 2023, please refer to our [LinkedIn Live event history](https://www.linkedin.com/company/acryl-data/events/).
+:::
-## 03/23/2023
-[Full YouTube video](https://youtu.be/BTX8rIBe0yo)
+### June 2023
+[Full YouTube video](https://www.youtube.com/watch?v=1QVcUmRQK5E)
+
+- Community & Project Updates - Maggie Hays & Shirshanka Das (Acryl Data)
+- Community Case Study: Dataset Joins - Raj Tekal & Bobbie-Jean Nowak (Optum)
+- DataHub 201: Column-Level Lineage - Hyejin Yoon (Acryl Data)
+- Sneak Peek: BigQuery Column-Level Lineage with SQL Parsing - Harshal Sheth (Acryl Data)
+- DataHub Performance Tuning – Indy Prentice (Acryl Data)
+
+
+### May 2023
+[Full YouTube video](https://www.youtube.com/watch?v=KHNPjSbbZR8)
+
+**Agenda**
+- Community - Maggie Hays & Shirshanka Das (Acryl Data)
+- Community Case Study: Jira + DataHub for Access Requests - Joshua Garza (Sharp Healthcare)
+- Sneak Peek: Use your own ownership types - Pedro Silva (Acryl Data)
+- Sneak Peek: Data Contracts are coming! – John Joyce, Shirshanka (Acryl Data)
+- Bring DataHub into your BI Tools — Chris Collins (Acryl Data)
+
+### Apr 2023
+[Full YouTube video](https://www.youtube.com/watch?v=D5YYGu-ZIBo)
+
+**Agenda**
+- Community & Roadmap Updates - Maggie Hays & Shirshanka Das (Acryl Data)
+- DataHub 201: Python SDK - Hyejin Yoon (Acryl Data)
+- Streamlined Search & Browse Experience - Chris Collins (Acryl Data)
+- Acryl's DataHub GitHub Actions - Harshal Sheth (Acryl Data)
+- Data Products in DataHub - Shirshanka Das & Chris Collins (Acryl Data)
+- DataHub Docs Bot - Maggie Hays (Acryl Data)
+
+### Mar 2023
-### Agenda
+[Full YouTube video](https://youtu.be/BTX8rIBe0yo)
+**Agenda**
- Community & Roadmap Update
- Recent Releases
- Community Case Study — Jumio’s DataHub adoption journey
- DataHub 201: Data Debugging
- Sneak Peek: Streamlined Filtering Experience
-## 02/23/2023
+### Feb 2023
[Full YouTube video](https://youtu.be/UItt4ppJSFc)
-### Agenda
+**Agenda**
- Community & Roadmap Update
- Recent Releases
@@ -27,20 +60,20 @@ A list of previous Town Halls, their planned schedule, and the recording of the
- Simplifying Metadata Ingestion
- DataHub 201: Rolling Out DataHub
-## 01/26/2023
+### Jan 2023 (26th)
[Full YouTube video](https://youtu.be/A3mSiGHZ6Rc)
-### Agenda
+**Agenda**
- What’s to Come - Q1 2023 Roadmap: Data Products, Data Contracts and more
- Community Case Study - Notion: Automating annotations and metadata propagation
- Community Contribution - Grab: Improvements to documentation editing
- Simplifying DataHub - Removing Schema Registry requirement and introducing DataHub Lite
-## 01/05/2023
+### Jan 2023 (5th)
[Full YouTube video](https://youtu.be/ECxIMbKwuOY)
-### Agenda
+**Agenda**
- DataHub Community: 2022 in Review - Our Community of Data Practitioners is one of a kind. We’ll take the time to celebrate who we are, what we’ve built, and how we’ve collaborated in the past 12 months.
- Search Improvements - Learn how we’re making the Search experience smarter and faster to connect you with the most relevant resources during data discovery.
@@ -49,13 +82,12 @@ A list of previous Town Halls, their planned schedule, and the recording of the
- Sneak Peek: Time-based Lineage - Get a preview of how you’ll soon be able to trace lineage between datasets across different points in time to understand how interdependencies have evolved.
- Sneak Peek: Chrome Extension - Soon, you’ll be able to quickly access rich metadata from DataHub while exploring resources in Looker via our upcoming Chrome Extension.
-## 12/01/2022
+### Dec 2023
[Full YouTube video](https://youtu.be/BlCLhG8lGoY)
-### Agenda
+**Agenda**
November Town Hall (in December!)
-
- Community Case Study - The Pinterest Team will share how they have integrated DataHub + Thrift and extended the Metadata Model with a Data Element entity to capture semantic types.
- NEW! Ingestion Quickstart Guides - DataHub newbies, this one is for you! We’re rolling out ingestion quickstart guides to help you quickly get up and running with DataHub + Snowflake, BigQuery, and more!
- NEW! In-App Product Tours - We’re making it easier than ever for end-users to get familiar with all that DataHub has to offer - hear all about the in-product onboarding resources we’re rolling out soon!
@@ -64,10 +96,10 @@ November Town Hall (in December!)
- NEW! Slack + Microsoft Teams Integrations - Send automated alerts to Slack and/or Teams to keep track of critical events and changes within DataHub.
- Hacktoberfest Winners Announced - We’ll recap this year’s Hacktoberfest and announce three winners of a $250 Amazon gift card & DataHub Swag.
-## 10/27/2022
+### Oct 2022
[Full YouTube video](https://youtu.be/B74WHxX5EMk)
-### Agenda
+**Agenda**
- Conquer Data Governance with Acryl Data’s Metadata Tests - Learn how to tackle Data Governance with incremental, automation-driven governance using Metadata Tests provided in Acryl Data’s managed DataHub offering
- Community Case Study - The Grab Team shares how they are using DataHub for data discoverability, automated classification and governance workflows, data quality observability, and beyond!
@@ -75,20 +107,19 @@ November Town Hall (in December!)
- Sneak Peek! Saved Views - Learn how you can soon use Saved Views to help end-users navigate entities in DataHub with more precision and focus
- Performance Improvements - Hear about the latest upgrades to DataHub performance
-## 9/29/2022
+### Sep 2022
[Full YouTube video](https://youtu.be/FjkNySWkghY)
-### Agenda
-
+**Agenda**
- Column Level Lineage is here! - Demo of column-level lineage and impact analysis in the DataHub UI
- Community Case Study - The Stripe Team shares how they leverage DataHub to power observability within their Airflow-based ecosystem
- Sneak Peek! Automated PII Classification - Preview upcoming functionality to automatically identify data fields that likely contain sensitive data
- Ingestion Improvements Galore - Improved performance and functionality for dbt, Looker, Tableau, and Presto ingestion sources
-## 8/25/2022
+### Aug 2022
[Full YouTube video](https://youtu.be/EJCKxKBvCwo)
-### Agenda
+**Agenda**
- Community Case Study - The Etsy Team shares their journey of adopting DataHub
- Looker & DataHub Improvements - surface the most relevant Looks and Dashboards
@@ -97,10 +128,11 @@ November Town Hall (in December!)
- Patch Support - Native support for PATCH in the metadata protocol to support efficient updates to add & remove owners, lineage, tags and more
- Sneak Peek! Advanced Search
-## 7/28/2022
+### Jul 2022
+
[Full YouTube video](https://youtu.be/Zrkf3Mzcvc4)
-### Agenda
+**Agenda**
- Community Updates
- Project Updates
@@ -109,21 +141,20 @@ November Town Hall (in December!)
- Streamlined Metadata Ingestion
- DataHub 201: Metadata Enrichment
-## 6/30/2022
+### Jun 2022
[Full YouTube video](https://youtu.be/fAD53fEJ6m0)
-### Agenda
-
+**Agenda**
- Community Updates
- Project Updates
- dbt Integration Updates
- CSV Ingestion Support
- DataHub 201 - Glossary Term Deep Dive
-## 5/26/2022
+### May 2022
[Full YouTube video](https://youtu.be/taKb_zyowEE)
-### Agenda
+**Agenda**
- Community Case Study: Hear how the G-Research team is using Cassandra as DataHub’s Backend
- Creating & Editing Glossary Terms from the DataHub UI
@@ -132,20 +163,22 @@ November Town Hall (in December!)
- Sneak Peek: Data Reliability with DataHub
- Metadata Day Hackathon Winners
-## 4/28/2022
+### Apr 2022
[Full YouTube video](https://www.youtube.com/watch?v=7iwNxHgqxtg)
-### Agenda
+**Agenda**
+
- Community Case Study: Hear from Included Health about how they are embedding external tools into the DataHub UI
- New! Actions Framework: run custom code when changes happen within DataHub
- UI Refresh for ML Entities
- Improved deletion support for time-series aspects, tags, terms, & more
- OpenAPI Improvements
-## 3/31/2022
+### Mar 2022
[Full YouTube video](https://www.youtube.com/watch?v=IVazVgcNRdw)
-### Agenda
+**Agenda**
+
- Community Case Study: Hear from Zendesk about how they are applying “shift left” principles by authoring metadata in their Protobuf schemas
- RBAC Functionality: View-Based Policies
- Schema Version History - surfacing the history of schema changes in DataHub's UI
@@ -154,20 +187,22 @@ November Town Hall (in December!)
- Delete API
-## 2/25/2022
+### Feb 2022
[Full YouTube video](https://www.youtube.com/watch?v=enBqB2Dbuv4)
-### Agenda
+**Agenda**
+
- Lineage Impact Analysis - using DataHub to understand the impact of changes on downstream dependencies
- Displaying Data Quality Checks in the UI
- Roadmap update: Schema Version History & Column-Level Lineage
- Community Case Study: Managing Lineage via YAML
-## 1/28/2022
+### Jan 2022
[Full YouTube video](https://youtu.be/ShlSR3dMUnE)
-### Agenda
+**Agenda**
+
- Community & Roadmap Updates by Maggie Hays (Acryl Data)
- Project Updates by Shirshanka Das (Acryl Data)
@@ -176,10 +211,11 @@ November Town Hall (in December!)
- DataHub Basics — Data Profiling & Usage Stats 101 by Maggie Hays & Tamás Németh (Acryl Data)
- Demo: Spark Lineage by Mugdha Hardikar (GS Lab) & Shirshanka Das
-## 12/17/2021
+### Dec 2021
[Full YouTube video](https://youtu.be/rYInKCwxu7o)
-### Agenda
+**Agenda**
+
- Community & Roadmap Updates by Maggie Hays (Acryl Data)
- Project Updates by Shirshanka Das (Acryl Data)
- 2021 DataHub Community in Review by Maggie Hays
@@ -189,10 +225,11 @@ November Town Hall (in December!)
- Top DataHub Contributors of 2021 - Maggie Hays
- Final Surprise! We Interviewed a 10yo and a 70yo about DataHub
-## 11/19/2021
+### Nov 2021
[Full YouTube video](https://youtu.be/to80sEDZz7k)
-### Agenda
+**Agenda**
+
- Community & Roadmap Updates by Maggie Hays (Acryl Data)
- Project Updates by Shirshanka Das (Acryl Data)
- DataHub Basics -- Lineage 101 by John Joyce & Surya Lanka (Acryl Data)
@@ -200,10 +237,11 @@ November Town Hall (in December!)
- DataHub API Authentication by John Joyce (Acryl Data)
- Case Study: LinkedIn pilot to extend the OSS UI by Aikepaer Abuduweili & Joshua Shinavier
-## 10/29/2021
+### Oct 2021
[Full YouTube video](https://youtu.be/GrS_uZhYNm0)
-### Agenda
+**Agenda**
+
- DataHub Community & Roadmap Update - Maggie Hays (Acryl Data)
- October Project Updates - Shirshanka Das (Acryl Data)
- Introducing Recommendations - John Joyce & Dexter Lee (Acryl Data)
@@ -211,10 +249,11 @@ November Town Hall (in December!)
- Data Profiling Improvements - Surya Lanka & Harshal Sheth (Acryl Data)
- Lineage Improvements & BigQuery Dataset Lineage by Gabe Lyons & Varun Bharill (Acryl Data)
-## 9/24/2021
+### Sep 2021
[Full YouTube video](https://youtu.be/nQDiKPKnLLQ)
-### Agenda
+**Agenda**
+
- Project Updates and Callouts by Shirshanka
- GraphQL Public API Annoucement
- Demo: Faceted Search by Gabe Lyons (Acryl Data)
@@ -224,10 +263,11 @@ November Town Hall (in December!)
- Offline
- Foreign Key and Related Term Mapping by Gabe Lyons (Acryl Data) [video](https://www.loom.com/share/79f27c2d9f6c4a3b8aacbc48c19add18)
-## 8/27/2021
+### Aug 2021
[Full YouTube video](https://youtu.be/3joZINi3ti4)
-### Agenda
+**Agenda**
+
- Project Updates and Callouts by Shirshanka
- Business Glossary Demo
- 0.8.12 Upcoming Release Highlights
@@ -239,12 +279,13 @@ November Town Hall (in December!)
- Performance Monitoring by Dexter Lee (Acryl Data) [video](https://youtu.be/6Xfr_Y9abZo)
-## 7/23/2021
+### Jul 2021
[Full YouTube video](https://www.youtube.com/watch?v=rZsiB8z5rG4)
[Medium Post](https://medium.com/datahub-project/datahub-project-updates-f4299cd3602e?source=friends_link&sk=27af7637f7ae44786ede694c3af512a5)
-### Agenda
+**Agenda**
+
- Project Updates by Shirshanka
- Release highlights
@@ -253,12 +294,13 @@ November Town Hall (in December!)
- Demo: AWS SageMaker integration for Models and Features by Kevin Hu (Acryl Data)
-## 6/25/2021
+### Jun 2021
[Full YouTube video](https://www.youtube.com/watch?v=xUHOdDfdFpY)
[Medium Post](https://medium.com/datahub-project/datahub-project-updates-ed3155476408?source=friends_link&sk=02816a16ff2acd688e6db8eb55808d31)
-#### Agenda
+**Agenda**
+
- Project Updates by Shirshanka
- Release notes
@@ -269,12 +311,13 @@ November Town Hall (in December!)
- Developer Session: Simplified Deployment for DataHub by John Joyce, Gabe Lyons (Acryl Data)
-## 5/27/2021
+### May 2021
[Full YouTube video](https://www.youtube.com/watch?v=qgW_xpIr1Ho)
[Medium Post](https://medium.com/datahub-project/linkedin-datahub-project-updates-ed98cdf913c1?source=friends_link&sk=9930ec5579299b155ea87c747683d1ad)
-#### Agenda
+**Agenda**
+
- Project Updates by Shirshanka - 10 mins
- 0.8.0 Release
@@ -284,12 +327,13 @@ November Town Hall (in December!)
- Deep Dive: No Code Metadata Engine by John Joyce (Acryl Data) - 20 mins
- General Q&A and closing remarks
-## 4/23/2021
+### Apr 2021
[Full YouTube video](https://www.youtube.com/watch?v=dlFa4ubJ9ho)
[Medium Digest](https://medium.com/datahub-project/linkedin-datahub-project-updates-2b0d26066b8f?source=friends_link&sk=686c47219ed294e0838ae3e2fe29084d)
-#### Agenda
+**Agenda**
+
- Welcome - 5 mins
- Project Updates by Shirshanka - 10 mins
@@ -302,12 +346,13 @@ November Town Hall (in December!)
- General Q&A and closing remarks - 5 mins
-## 3/19/2021
+### Mar 2021
[YouTube video](https://www.youtube.com/watch?v=xE8Uc27VTG4)
-
+
[Medium Digest](https://medium.com/datahub-project/linkedin-datahub-project-updates-697f0faddd10?source=friends_link&sk=9888633c5c7219b875125e87a703ec4d)
-#### Agenda
+**Agenda**
+
* Welcome - 5 mins
* Project Updates ([slides](https://drive.google.com/file/d/1c3BTP3oDAzJr07l6pY6CkDZi5nT0cLRs/view?usp=sharing)) by [Shirshanka](https://www.linkedin.com/in/shirshankadas/) - 10 mins
@@ -320,11 +365,11 @@ November Town Hall (in December!)
* Closing remarks - 5 mins
-## 2/19/2021
+### Feb 2021
[YouTube video](https://www.youtube.com/watch?v=Z9ImbcsAVl0)
-
+
[Medium Digest](https://medium.com/datahub-project/linkedin-datahub-project-updates-february-2021-edition-338d2c6021f0)
-#### Agenda
+**Agenda**
* Welcome - 5 mins
* Latest React App Demo! ([video](https://www.youtube.com/watch?v=RQBEJhcen5E)) by John Joyce and Gabe Lyons - 5 mins
@@ -334,12 +379,12 @@ November Town Hall (in December!)
* Closing remarks - 5 mins
-## 1/15/2021
+### Jan 2021
[Full Recording](https://youtu.be/r862MZTLAJ0)
[Slide-deck](https://docs.google.com/presentation/d/e/2PACX-1vQ2B0iHb2uwege1wlkXHOgQer0myOMEE5EGnzRjyqw0xxS5SaAc8VMZ_1XVOHuTZCJYzZZW4i9YnzSN/pub?start=false&loop=false&delayms=3000)
-Agenda
+**Agenda**
- Announcements - 2 mins
- Community Updates ([video](https://youtu.be/r862MZTLAJ0?t=99)) - 10 mins
@@ -349,10 +394,10 @@ Agenda
- General Q&A from sign up sheet, slack, and participants - 15 mins
- Closing remarks - 5 minutes
-## 12/04/2020
+### Dec 2020
[Recording](https://linkedin.zoom.us/rec/share/8E7-lFnCi_kQ8OvXR9kW6fn-AjvV8VlqOO2xYR8b5Y_UeWI_ODcKFlxlHqYgBP7j.S-c8C1YMrz7d3Mjq)
-Agenda
+**Agenda**
- Quick intro - 5 mins
- [Why did Grofers choose DataHub for their data catalog?](https://github.com/acryldata/static-assets-test/raw/master/imgs/demo/Datahub_at_Grofers.pdf) by [Shubham Gupta](https://www.linkedin.com/in/shubhamg931/) - 15 minutes
@@ -360,11 +405,11 @@ Agenda
- General Q&A from sign up sheet, slack, and participants - 15 mins
- Closing remarks - 5 minutes
-## 11/06/2020
+### Nov 2020
[Recording](https://linkedin.zoom.us/rec/share/0yvjZ2fOzVmD8aaDo3lC59fXivmYG3EnF0U9tMVgKs827595usvSoIhtFUPjZCsU.b915nLRkw6iQlnoD)
-Agenda
+**Agenda**
- Quick intro - 5 mins
- [Lightning talk on Metadata use-cases at LinkedIn](https://github.com/acryldata/static-assets-test/raw/master/imgs/demo/Metadata_Use-Cases_at_LinkedIn_-_Lightning_Talk.pdf) by [Shirshanka Das](https://www.linkedin.com/in/shirshankadas/) (LinkedIn) - 5 mins
@@ -374,11 +419,11 @@ Agenda
- Closing remarks - 5 minutes
-## 09/25/2020
+### Sep 2020
[Recording](https://linkedin.zoom.us/rec/share/uEQ2pRY0BHbVqk_sOTVRm05VXJ0xM_zKJ26yzfCBqNZItiBht__k_juCCahJ37QK.IKAU9qA_0qdURX4_)
-Agenda
+**Agenda**
- Quick intro - 5 mins
- [Data Discoverability at SpotHero](https://github.com/acryldata/static-assets-test/raw/master/imgs/demo/Data_Discoverability_at_SpotHero.pdf) by [Maggie Hays](https://www.linkedin.com/in/maggie-hays/) (SpotHero) - 20 mins
@@ -386,23 +431,23 @@ Agenda
- General Q&A from sign up sheet, slack, and participants - 15 mins
- Closing remarks - 5 mins
-## 08/28/2020
+### Aug 2020
[Recording](https://linkedin.zoom.us/rec/share/vMBfcb31825IBZ3T71_wffM_GNv3T6a8hicf8_dcfzQlhfFxl5i_CPVKcmYaZA)
-Agenda
+**Agenda**
- Quick intro - 5 mins
- [Data Governance look for a Digital Bank](https://www.slideshare.net/SheetalPratik/linkedinsaxobankdataworkbench) by [Sheetal Pratik](https://www.linkedin.com/in/sheetalpratik/) (Saxo Bank) - 20 mins
- Column level lineage for datasets demo by [Nagarjuna Kanamarlapudi](https://www.linkedin.com/in/nagarjunak/) (LinkedIn) - 15 mins
- General Q&A from sign up sheet and participants - 15 mins
- Closing remarks - 5 mins
-## 07/31/20
+### Jul 2020
[Recording](https://bluejeans.com/s/wjnDRJevi5z/)
-Agenda
+**Agenda**
* Quick intro - 5 mins
* Showcasing new entities onboarded to internal LinkedIn DataHub (Data Concepts, Schemas) by [Nagarjuna Kanamarlapudi](https://www.linkedin.com/in/nagarjunak) (LinkedIn) - 15 mins
* Showcasing new Lineage UI in internal LinkedIn DataHub By [Ignacio Bona](https://www.linkedin.com/in/ignaciobona) (LinkedIn) - 10 mins
@@ -410,12 +455,12 @@ Agenda
* Answering questions from the signup sheet - 13 mins
* Questions from the participants - 10 mins
* Closing remarks - 5 mins
-
-## 06/26/20
+
+### June 2020
[Recording](https://bluejeans.com/s/yILyR/)
-Agenda
+**Agenda**
* Quick intro - 5 mins
* Onboarding Data Process entity by [Liangjun Jiang](https://github.com/liangjun-jiang) (Expedia) - 15 mins
* How to onboard a new relationship to metadata graph by [Kerem Sahin](https://github.com/keremsahin1) (Linkedin) - 15 mins
@@ -423,11 +468,11 @@ Agenda
* Questions from the participants - 10 mins
* Closing remarks - 5 mins
-## 05/29/20
+### May 2020
[Recording](https://bluejeans.com/s/GCAzY)
-Agenda
+**Agenda**
* Quick intro - 5 mins
* How to add a new aspect/feature for an existing entity in UI by [Charlie Tran](https://www.linkedin.com/in/charlie-tran/) (LinkedIn) - 10 mins
* How to search over a new field by [Jyoti Wadhwani](https://www.linkedin.com/in/jyotiwadhwani/) (LinkedIn) - 10 mins
@@ -435,11 +480,11 @@ Agenda
* Questions from the participants - 10 mins
* Closing remarks - 5 mins
-## 04/17/20
+### Apr 2020 (17th)
[Recording](https://bluejeans.com/s/eYRD4)
-Agenda
+**Agenda**
* Quick intro - 5 mins
* [DataHub Journey with Expedia Group](https://www.youtube.com/watch?v=ajcRdB22s5o&ab_channel=ArunVasudevan) by [Arun Vasudevan](https://www.linkedin.com/in/arun-vasudevan-55117368/) (Expedia) - 10 mins
* Deploying DataHub using Nix by [Larry Luo](https://github.com/clojurians-org) (Shanghai HuaRui Bank) - 10 mins
@@ -447,13 +492,13 @@ Agenda
* Questions from the participants - 10 mins
* Closing remarks - 5 mins
-## 04/03/20
+### Apr 2020 (3rd)
[Recording](https://bluejeans.com/s/vzYpa)
[Q&A](https://docs.google.com/document/d/1ChF9jiJWv9wj3HLLkFYRg7NSYg8Kb0PT7COd7Hf9Zpk/edit?usp=sharing)
-- Agenda
+- **Agenda**
* Quick intro - 5 mins
* Creating Helm charts for deploying DataHub on Kubernetes by [Bharat Akkinepalli](https://www.linkedin.com/in/bharat-akkinepalli-ba0b7223/) (ThoughtWorks) - 10 mins
* How to onboard a new metadata aspect by [Mars Lan](https://www.linkedin.com/in/marslan) (LinkedIn) - 10 mins
@@ -461,13 +506,13 @@ Agenda
* Questions from the participants - 10 mins
* Closing remarks - 5 mins
-## 03/20/20
+### Mar 2020 (20th)
[Recording](https://bluejeans.com/s/FSKEF)
[Q&A](https://docs.google.com/document/d/1vQ6tAGXsVafnPIcZv1GSYgnTJJXFOACa1aWzOQjiGHI/edit)
-Agenda
+**Agenda**
* Quick intro - 5 mins
* Internal DataHub demo - 10 mins
* What's coming up next for DataHub (what roadmap items we are working on) - 10 mins
@@ -475,9 +520,8 @@ Agenda
* Questions from the participants - 10 mins
* Closing remarks - 5 mins
-## 03/06/20
+### Mar 2020 (6th)
[Recording](https://bluejeans.com/s/vULMG)
-[Q&A](https://docs.google.com/document/d/1N_VGqlH9CD-54LBsVlpcK2Cf2Mgmuzq79EvN9qgBqtQ/edit)
-
+[Q&A](https://docs.google.com/document/d/1N_VGqlH9CD-54LBsVlpcK2Cf2Mgmuzq79EvN9qgBqtQ/edit)
\ No newline at end of file
diff --git a/docs/townhalls.md b/docs/townhalls.md
index f9c3bb16150cd..c80d198e5184c 100644
--- a/docs/townhalls.md
+++ b/docs/townhalls.md
@@ -7,8 +7,13 @@ From time to time we also use the opportunity to showcase upcoming features.
## Meeting Invite & Agenda
-You can join with this link https://zoom.datahubproject.io, or [RSVP](https://rsvp.datahubproject.io/) to get a calendar invite - this will always have the most up-to-date agenda for upcoming sessions.
+You can join with [this link](https://zoom.datahubproject.io) or [RSVP](https://rsvp.datahubproject.io/) to get a calendar invite - this will always have the most up-to-date agenda for upcoming sessions.
+
+## Town Hall History
+
+See our Town Hall history for the recordings and summaries of the past town halls.
+
+* [Town Hall Events (July 2023~)](https://www.linkedin.com/company/acryl-data/events/)
+* [Town Hall Events (~June 2023)](townhall-history.md)
-## Past Meetings
-See [Town Hall History](townhall-history.md) for recordings of past town halls.
From 179f103412d036212a1155d436a507def4f4928f Mon Sep 17 00:00:00 2001
From: Xuelei Li <115022112+lix-mms@users.noreply.github.com>
Date: Fri, 10 Nov 2023 17:58:38 +0100
Subject: [PATCH 087/792] fix(metadata-io): in Neo4j service use proper
algorithm to get lineage (#8687)
Co-authored-by: RyanHolstien
Co-authored-by: david-leifker <114954101+david-leifker@users.noreply.github.com>
---
build.gradle | 8 +-
docker/neo4j/env/docker.env | 1 +
.../docker-compose-m1.quickstart.yml | 1 +
.../quickstart/docker-compose.quickstart.yml | 1 +
docs/how/updating-datahub.md | 1 +
metadata-io/build.gradle | 3 +
.../graph/neo4j/Neo4jGraphService.java | 231 +++++++++---------
.../graph/neo4j/Neo4jGraphServiceTest.java | 130 ++++++++++
.../graph/neo4j/Neo4jTestServerBuilder.java | 6 +-
9 files changed, 267 insertions(+), 115 deletions(-)
diff --git a/build.gradle b/build.gradle
index 31e005e001cf0..54802917d05a5 100644
--- a/build.gradle
+++ b/build.gradle
@@ -7,6 +7,8 @@ buildscript {
ext.springBootVersion = '2.7.14'
ext.openTelemetryVersion = '1.18.0'
ext.neo4jVersion = '4.4.9'
+ ext.neo4jTestVersion = '4.4.25'
+ ext.neo4jApocVersion = '4.4.0.20:all'
ext.testContainersVersion = '1.17.4'
ext.elasticsearchVersion = '2.9.0' // ES 7.10, Opensearch 1.x, 2.x
ext.jacksonVersion = '2.15.2'
@@ -154,8 +156,10 @@ project.ext.externalDependency = [
'mockServer': 'org.mock-server:mockserver-netty:5.11.2',
'mockServerClient': 'org.mock-server:mockserver-client-java:5.11.2',
'mysqlConnector': 'mysql:mysql-connector-java:8.0.20',
- 'neo4jHarness': 'org.neo4j.test:neo4j-harness:' + neo4jVersion,
+ 'neo4jHarness': 'org.neo4j.test:neo4j-harness:' + neo4jTestVersion,
'neo4jJavaDriver': 'org.neo4j.driver:neo4j-java-driver:' + neo4jVersion,
+ 'neo4jTestJavaDriver': 'org.neo4j.driver:neo4j-java-driver:' + neo4jTestVersion,
+ 'neo4jApoc': 'org.neo4j.procedure:apoc:' + neo4jApocVersion,
'opentelemetryApi': 'io.opentelemetry:opentelemetry-api:' + openTelemetryVersion,
'opentelemetryAnnotations': 'io.opentelemetry:opentelemetry-extension-annotations:' + openTelemetryVersion,
'opentracingJdbc':'io.opentracing.contrib:opentracing-jdbc:0.2.15',
@@ -218,7 +222,7 @@ project.ext.externalDependency = [
'common': 'commons-io:commons-io:2.7',
'jline':'jline:jline:1.4.1',
'jetbrains':' org.jetbrains.kotlin:kotlin-stdlib:1.6.0'
-
+
]
allprojects {
diff --git a/docker/neo4j/env/docker.env b/docker/neo4j/env/docker.env
index 961a5ffcf5483..c8f2a4878900f 100644
--- a/docker/neo4j/env/docker.env
+++ b/docker/neo4j/env/docker.env
@@ -1,3 +1,4 @@
NEO4J_AUTH=neo4j/datahub
NEO4J_dbms_default__database=graph.db
NEO4J_dbms_allow__upgrade=true
+NEO4JLABS_PLUGINS="[\"apoc\"]"
diff --git a/docker/quickstart/docker-compose-m1.quickstart.yml b/docker/quickstart/docker-compose-m1.quickstart.yml
index 613718306abef..4df32395cf82d 100644
--- a/docker/quickstart/docker-compose-m1.quickstart.yml
+++ b/docker/quickstart/docker-compose-m1.quickstart.yml
@@ -253,6 +253,7 @@ services:
- NEO4J_AUTH=neo4j/datahub
- NEO4J_dbms_default__database=graph.db
- NEO4J_dbms_allow__upgrade=true
+ - NEO4JLABS_PLUGINS=["apoc"]
healthcheck:
interval: 1s
retries: 5
diff --git a/docker/quickstart/docker-compose.quickstart.yml b/docker/quickstart/docker-compose.quickstart.yml
index 30ccbae59be74..29c980532d46f 100644
--- a/docker/quickstart/docker-compose.quickstart.yml
+++ b/docker/quickstart/docker-compose.quickstart.yml
@@ -253,6 +253,7 @@ services:
- NEO4J_AUTH=neo4j/datahub
- NEO4J_dbms_default__database=graph.db
- NEO4J_dbms_allow__upgrade=true
+ - NEO4JLABS_PLUGINS=["apoc"]
healthcheck:
interval: 1s
retries: 5
diff --git a/docs/how/updating-datahub.md b/docs/how/updating-datahub.md
index 28f11e4b6d707..90b53161950e8 100644
--- a/docs/how/updating-datahub.md
+++ b/docs/how/updating-datahub.md
@@ -16,6 +16,7 @@ This file documents any backwards-incompatible changes in DataHub and assists pe
### Breaking Changes
+- #8687 (datahub-helm #365 #353) - If Helm is used for installation and Neo4j is enabled, update the prerequisites Helm chart to version >=0.1.2 and adjust your value overrides in the `neo4j:` section according to the new structure.
- #9044 - GraphQL APIs for adding ownership now expect either an `ownershipTypeUrn` referencing a customer ownership type or a (deprecated) `type`. Where before adding an ownership without a concrete type was allowed, this is no longer the case. For simplicity you can use the `type` parameter which will get translated to a custom ownership type internally if one exists for the type being added.
- #9010 - In Redshift source's config `incremental_lineage` is set default to off.
- #8810 - Removed support for SQLAlchemy 1.3.x. Only SQLAlchemy 1.4.x is supported now.
diff --git a/metadata-io/build.gradle b/metadata-io/build.gradle
index 740fed61f13d5..4b36f533476f7 100644
--- a/metadata-io/build.gradle
+++ b/metadata-io/build.gradle
@@ -57,6 +57,9 @@ dependencies {
testImplementation externalDependency.h2
testImplementation externalDependency.mysqlConnector
testImplementation externalDependency.neo4jHarness
+ testImplementation (externalDependency.neo4jApoc) {
+ exclude group: 'org.yaml', module: 'snakeyaml'
+ }
testImplementation externalDependency.mockito
testImplementation externalDependency.mockitoInline
testImplementation externalDependency.iStackCommons
diff --git a/metadata-io/src/main/java/com/linkedin/metadata/graph/neo4j/Neo4jGraphService.java b/metadata-io/src/main/java/com/linkedin/metadata/graph/neo4j/Neo4jGraphService.java
index 41d39cca4edda..ac57fb7db2b78 100644
--- a/metadata-io/src/main/java/com/linkedin/metadata/graph/neo4j/Neo4jGraphService.java
+++ b/metadata-io/src/main/java/com/linkedin/metadata/graph/neo4j/Neo4jGraphService.java
@@ -5,6 +5,7 @@
import com.datahub.util.exception.RetryLimitReached;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.collect.ImmutableMap;
+import com.google.common.collect.Iterables;
import com.linkedin.common.UrnArray;
import com.linkedin.common.UrnArrayArray;
import com.linkedin.common.urn.Urn;
@@ -25,17 +26,20 @@
import com.linkedin.metadata.query.filter.RelationshipDirection;
import com.linkedin.metadata.query.filter.RelationshipFilter;
import com.linkedin.metadata.utils.metrics.MetricUtils;
+import com.linkedin.util.Pair;
import io.opentelemetry.extension.annotations.WithSpan;
import java.net.URISyntaxException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
+import java.util.HashSet;
import java.util.List;
import java.util.Map;
+import java.util.Optional;
import java.util.Set;
import java.util.StringJoiner;
-import java.util.function.Function;
import java.util.stream.Collectors;
+import java.util.stream.StreamSupport;
import javax.annotation.Nonnull;
import javax.annotation.Nullable;
import lombok.AllArgsConstructor;
@@ -50,8 +54,7 @@
import org.neo4j.driver.Session;
import org.neo4j.driver.SessionConfig;
import org.neo4j.driver.exceptions.Neo4jException;
-import org.neo4j.driver.internal.InternalRelationship;
-import org.neo4j.driver.types.Node;
+import org.neo4j.driver.types.Relationship;
@Slf4j
@@ -62,9 +65,6 @@ public class Neo4jGraphService implements GraphService {
private final Driver _driver;
private SessionConfig _sessionConfig;
- private static final String SOURCE = "source";
- private static final String UI = "UI";
-
public Neo4jGraphService(@Nonnull LineageRegistry lineageRegistry, @Nonnull Driver driver) {
this(lineageRegistry, driver, SessionConfig.defaultConfig());
}
@@ -234,53 +234,36 @@ public EntityLineageResult getLineage(@Nonnull Urn entityUrn, @Nonnull LineageDi
@Nullable Long endTimeMillis) {
log.debug(String.format("Neo4j getLineage maxHops = %d", maxHops));
- final String statement =
- generateLineageStatement(entityUrn, direction, graphFilters, maxHops, startTimeMillis, endTimeMillis);
+ final var statementAndParams =
+ generateLineageStatementAndParameters(entityUrn, direction, graphFilters, maxHops, startTimeMillis, endTimeMillis);
+
+ final var statement = statementAndParams.getFirst();
+ final var parameters = statementAndParams.getSecond();
List neo4jResult =
- statement != null ? runQuery(buildStatement(statement, new HashMap<>())).list() : new ArrayList<>();
-
- // It is possible to have more than 1 path from node A to node B in the graph and previous query returns all the paths.
- // We convert the List into Map with only the shortest paths. "item.get(i).size()" is the path size between two nodes in relation.
- // The key for mapping is the destination node as the source node is always the same, and it is defined by parameter.
- neo4jResult = neo4jResult.stream()
- .collect(Collectors.toMap(item -> item.values().get(2).asNode().get("urn").asString(), Function.identity(),
- (item1, item2) -> item1.get(1).size() < item2.get(1).size() ? item1 : item2))
- .values()
- .stream()
- .collect(Collectors.toList());
+ statement != null ? runQuery(buildStatement(statement, parameters)).list() : new ArrayList<>();
LineageRelationshipArray relations = new LineageRelationshipArray();
neo4jResult.stream().skip(offset).limit(count).forEach(item -> {
String urn = item.values().get(2).asNode().get("urn").asString();
- String relationType = ((InternalRelationship) item.get(1).asList().get(0)).type().split("r_")[1];
- int numHops = item.get(1).size();
try {
- // Generate path from r in neo4jResult
- List pathFromRelationships =
- item.values().get(1).asList(Collections.singletonList(new ArrayList())).stream().map(t -> createFromString(
- // Get real upstream node/downstream node by direction
- ((InternalRelationship) t).get(direction == LineageDirection.UPSTREAM ? "startUrn" : "endUrn")
- .asString())).collect(Collectors.toList());
- if (direction == LineageDirection.UPSTREAM) {
- // For ui to show path correctly, reverse path for UPSTREAM direction
- Collections.reverse(pathFromRelationships);
- // Add missing original node to the end since we generate path from relationships
- pathFromRelationships.add(Urn.createFromString(item.values().get(0).asNode().get("urn").asString()));
- } else {
- // Add missing original node to the beginning since we generate path from relationships
- pathFromRelationships.add(0, Urn.createFromString(item.values().get(0).asNode().get("urn").asString()));
- }
+ final var path = item.get(1).asPath();
+ final List nodeListAsPath = StreamSupport.stream(
+ path.nodes().spliterator(), false)
+ .map(node -> createFromString(node.get("urn").asString()))
+ .collect(Collectors.toList());
+
+ final var firstRelationship = Optional.ofNullable(Iterables.getFirst(path.relationships(), null));
relations.add(new LineageRelationship().setEntity(Urn.createFromString(urn))
- .setType(relationType)
- .setDegree(numHops)
- .setPaths(new UrnArrayArray(new UrnArray(pathFromRelationships))));
+ // although firstRelationship should never be absent, provide "" as fallback value
+ .setType(firstRelationship.map(Relationship::type).orElse(""))
+ .setDegree(path.length())
+ .setPaths(new UrnArrayArray(new UrnArray(nodeListAsPath))));
} catch (URISyntaxException ignored) {
log.warn(String.format("Can't convert urn = %s, Error = %s", urn, ignored.getMessage()));
}
});
-
EntityLineageResult result = new EntityLineageResult().setStart(offset)
.setCount(relations.size())
.setRelationships(relations)
@@ -290,31 +273,104 @@ public EntityLineageResult getLineage(@Nonnull Urn entityUrn, @Nonnull LineageDi
return result;
}
- private String generateLineageStatement(@Nonnull Urn entityUrn, @Nonnull LineageDirection direction,
- GraphFilters graphFilters, int maxHops, @Nullable Long startTimeMillis, @Nullable Long endTimeMillis) {
- String statement;
- final String allowedEntityTypes = String.join(" OR b:", graphFilters.getAllowedEntityTypes());
-
- final String multiHopMatchTemplateIndirect = "MATCH p = shortestPath((a {urn: '%s'})<-[r*1..%d]-(b)) ";
- final String multiHopMatchTemplateDirect = "MATCH p = shortestPath((a {urn: '%s'})-[r*1..%d]->(b)) ";
- // directionFilterTemplate should apply to all condition.
- final String multiHopMatchTemplate =
- direction == LineageDirection.UPSTREAM ? multiHopMatchTemplateIndirect : multiHopMatchTemplateDirect;
- final String fullQueryTemplate = generateFullQueryTemplate(multiHopMatchTemplate, startTimeMillis, endTimeMillis);
-
- if (startTimeMillis != null && endTimeMillis != null) {
- statement =
- String.format(fullQueryTemplate, startTimeMillis, endTimeMillis, entityUrn, maxHops, allowedEntityTypes,
- entityUrn);
- } else if (startTimeMillis != null) {
- statement = String.format(fullQueryTemplate, startTimeMillis, entityUrn, maxHops, allowedEntityTypes, entityUrn);
- } else if (endTimeMillis != null) {
- statement = String.format(fullQueryTemplate, endTimeMillis, entityUrn, maxHops, allowedEntityTypes, entityUrn);
+ private String getPathFindingLabelFilter(List entityNames) {
+ return entityNames.stream().map(x -> String.format("+%s", x)).collect(Collectors.joining("|"));
+ }
+
+ private String getPathFindingRelationshipFilter(@Nonnull List entityNames, @Nullable LineageDirection direction) {
+ // relationshipFilter supports mixing different directions for various relation types,
+ // so simply transform entries lineage registry into format of filter
+ final var filterComponents = new HashSet();
+ for (final var entityName : entityNames) {
+ if (direction != null) {
+ for (final var edgeInfo : _lineageRegistry.getLineageRelationships(entityName, direction)) {
+ final var type = edgeInfo.getType();
+ if (edgeInfo.getDirection() == RelationshipDirection.INCOMING) {
+ filterComponents.add("<" + type);
+ } else {
+ filterComponents.add(type + ">");
+ }
+ }
+ } else {
+ // return disjunctive combination of edge types regardless of direction
+ for (final var direction1 : List.of(LineageDirection.UPSTREAM, LineageDirection.DOWNSTREAM)) {
+ for (final var edgeInfo : _lineageRegistry.getLineageRelationships(entityName, direction1)) {
+ filterComponents.add(edgeInfo.getType());
+ }
+ }
+ }
+ }
+ return String.join("|", filterComponents);
+ }
+
+ private Pair> generateLineageStatementAndParameters(
+ @Nonnull Urn entityUrn, @Nonnull LineageDirection direction,
+ GraphFilters graphFilters, int maxHops,
+ @Nullable Long startTimeMillis, @Nullable Long endTimeMillis) {
+
+ final var parameterMap = new HashMap