From 5aa9df212b7602d489289b13412e67db8da252ab Mon Sep 17 00:00:00 2001 From: Samira-El Date: Wed, 18 May 2022 17:21:29 +0300 Subject: [PATCH 01/10] add username to extra cache keys when impersonation is enabled. --- superset/connectors/sqla/models.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/superset/connectors/sqla/models.py b/superset/connectors/sqla/models.py index 6dde259f9830c..95b80e027d9c3 100644 --- a/superset/connectors/sqla/models.py +++ b/superset/connectors/sqla/models.py @@ -84,6 +84,7 @@ get_virtual_table_metadata, validate_adhoc_subquery, ) +from superset.databases.utils import make_url_safe from superset.datasets.models import Dataset as NewDataset from superset.db_engine_specs.base import BaseEngineSpec, CTE_ALIAS, TimestampExpression from superset.exceptions import ( @@ -2026,6 +2027,15 @@ class and any keys added via `ExtraCache`. if self.has_extra_cache_key_calls(query_obj): sqla_query = self.get_sqla_query(**query_obj) extra_cache_keys += sqla_query.extra_cache_keys + + sqlalchemy_url = make_url_safe(self.database.sqlalchemy_uri_decrypted) + + if effective_user := self.database.get_effective_user(sqlalchemy_url): + logger.debug("User impersonation is enabled for database '%s', adding " + "current username to '%s' datasource's extra cache keys", + self.database_name, self.datasource_name) + extra_cache_keys.append(effective_user) + return extra_cache_keys @property From 1d47f76a38f444f2f44e4d235cbd54feeed99666 Mon Sep 17 00:00:00 2001 From: Samira-El Date: Fri, 20 May 2022 17:17:21 +0300 Subject: [PATCH 02/10] don't put effective_user in extra_cache_key --- superset/connectors/sqla/models.py | 8 -------- 1 file changed, 8 deletions(-) diff --git a/superset/connectors/sqla/models.py b/superset/connectors/sqla/models.py index 9682550e3afd5..4a3c022eb6e11 100644 --- a/superset/connectors/sqla/models.py +++ b/superset/connectors/sqla/models.py @@ -2036,14 +2036,6 @@ class and any keys added via `ExtraCache`. sqla_query = self.get_sqla_query(**query_obj) extra_cache_keys += sqla_query.extra_cache_keys - sqlalchemy_url = make_url_safe(self.database.sqlalchemy_uri_decrypted) - - if effective_user := self.database.get_effective_user(sqlalchemy_url): - logger.debug("User impersonation is enabled for database '%s', adding " - "current username to '%s' datasource's extra cache keys", - self.database_name, self.datasource_name) - extra_cache_keys.append(effective_user) - return extra_cache_keys @property From 5890a52fc5adf2beeadc2dc629ff2040e6a45f56 Mon Sep 17 00:00:00 2001 From: Samira-El Date: Wed, 25 May 2022 16:35:27 +0300 Subject: [PATCH 03/10] get_impersonation_key method in engine_spec class to construct an impersonation key --- superset/db_engine_specs/base.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/superset/db_engine_specs/base.py b/superset/db_engine_specs/base.py index 6a2ddc5e5c3f4..85817c70d4e2b 100644 --- a/superset/db_engine_specs/base.py +++ b/superset/db_engine_specs/base.py @@ -1537,6 +1537,18 @@ def cancel_query( # pylint: disable=unused-argument def parse_sql(cls, sql: str) -> List[str]: return [str(s).strip(" ;") for s in sqlparse.parse(sql)] + @classmethod + def get_impersonation_key( + cls, + username: Optional[str] + ) -> Any: + """ + Construct an impersonation key, by default it's the given username. + + :param username: logged in user's username + """ + return username + # schema for adding a database by providing parameters instead of the # full SQLAlchemy URI From f3cba778d2c3ab3ffc7b455a2ef423fb4a0132a2 Mon Sep 17 00:00:00 2001 From: Samira-El Date: Wed, 25 May 2022 16:36:04 +0300 Subject: [PATCH 04/10] pass datasource when creating query objects --- superset/common/query_context_factory.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/superset/common/query_context_factory.py b/superset/common/query_context_factory.py index cb40b9540818c..215afc4a66ab3 100644 --- a/superset/common/query_context_factory.py +++ b/superset/common/query_context_factory.py @@ -58,7 +58,9 @@ def create( result_type = result_type or ChartDataResultType.FULL result_format = result_format or ChartDataResultFormat.JSON queries_ = [ - self._query_object_factory.create(result_type, **query_obj) + self._query_object_factory.create(result_type, + datasource=datasource, + **query_obj) for query_obj in queries ] cache_values = { From 9d781998f68acc6c98eca9121569d07947c4136e Mon Sep 17 00:00:00 2001 From: Samira-El Date: Wed, 25 May 2022 16:38:03 +0300 Subject: [PATCH 05/10] adding an impersonation key when construction cache key --- superset/common/query_object.py | 1 + 1 file changed, 1 insertion(+) diff --git a/superset/common/query_object.py b/superset/common/query_object.py index 40d37041b9166..af9de7d4d7511 100644 --- a/superset/common/query_object.py +++ b/superset/common/query_object.py @@ -43,6 +43,7 @@ is_adhoc_metric, json_int_dttm_ser, QueryObjectFilterClause, + get_username, ) from superset.utils.date_parser import parse_human_timedelta from superset.utils.hashing import md5_sha_from_dict From 69fbb363797ce8a44cd7a3c8a12078fecdbf82de Mon Sep 17 00:00:00 2001 From: Samira-El Date: Wed, 25 May 2022 16:38:54 +0300 Subject: [PATCH 06/10] add feature flag to control caching per user --- superset/common/query_object.py | 18 ++++++++++++++++++ superset/config.py | 3 +++ 2 files changed, 21 insertions(+) diff --git a/superset/common/query_object.py b/superset/common/query_object.py index af9de7d4d7511..139151393a903 100644 --- a/superset/common/query_object.py +++ b/superset/common/query_object.py @@ -26,6 +26,7 @@ from flask_babel import gettext as _ from pandas import DataFrame +from superset import feature_flag_manager from superset.common.chart_data import ChartDataResultType from superset.exceptions import ( InvalidPostProcessingError, @@ -397,6 +398,23 @@ def cache_key(self, **extra: Any) -> str: if annotation_layers: cache_dict["annotation_layers"] = annotation_layers + # Add an impersonation key to cache if impersonation is enabled on the db + if feature_flag_manager.is_feature_enabled("CACHE_IMPERSONATION") and \ + self.datasource and hasattr(self.datasource, 'database') and \ + self.datasource.database.impersonate_user: + + username = get_username() + + if impersonation_key := self \ + .datasource \ + .database.db_engine_spec \ + .get_impersonation_key(username): + + logger.debug('Adding impersonation key to QueryObject cache dict: %s', + impersonation_key) + + cache_dict['impersonation_key'] = impersonation_key + return md5_sha_from_dict(cache_dict, default=json_int_dttm_ser, ignore_nan=True) def exec_post_processing(self, df: DataFrame) -> DataFrame: diff --git a/superset/config.py b/superset/config.py index c4af7bc8a57ee..4e2f18bb14a04 100644 --- a/superset/config.py +++ b/superset/config.py @@ -428,6 +428,9 @@ def _try_json_readsha(filepath: str, length: int) -> Optional[str]: # Apply RLS rules to SQL Lab queries. This requires parsing and manipulating the # query, and might break queries and/or allow users to bypass RLS. Use with care! "RLS_IN_SQLLAB": False, + # Enable caching per impersonation key (e.g username) in a datasource where user + # impersonation is enabled + "CACHE_IMPERSONATION": False, } # Feature flags may also be set via 'SUPERSET_FEATURE_' prefixed environment vars. From 01f8bb35344533f7d6898cde657a409b7bf263af Mon Sep 17 00:00:00 2001 From: Samira-El Date: Wed, 25 May 2022 16:51:28 +0300 Subject: [PATCH 07/10] revert changes --- superset/connectors/sqla/models.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/superset/connectors/sqla/models.py b/superset/connectors/sqla/models.py index 4a3c022eb6e11..60eff5e6304ab 100644 --- a/superset/connectors/sqla/models.py +++ b/superset/connectors/sqla/models.py @@ -84,7 +84,6 @@ get_virtual_table_metadata, validate_adhoc_subquery, ) -from superset.databases.utils import make_url_safe from superset.datasets.models import Dataset as NewDataset from superset.db_engine_specs.base import BaseEngineSpec, CTE_ALIAS, TimestampExpression from superset.exceptions import ( @@ -2035,7 +2034,6 @@ class and any keys added via `ExtraCache`. if self.has_extra_cache_key_calls(query_obj): sqla_query = self.get_sqla_query(**query_obj) extra_cache_keys += sqla_query.extra_cache_keys - return extra_cache_keys @property From 8315c21513c7c6740593d24801603ef1fb300e25 Mon Sep 17 00:00:00 2001 From: Samira-El Date: Thu, 26 May 2022 11:19:35 +0300 Subject: [PATCH 08/10] make precommit and pylint happy --- superset/common/query_context_factory.py | 6 ++--- superset/common/query_object.py | 31 ++++++++++++------------ superset/db_engine_specs/base.py | 5 +--- 3 files changed, 20 insertions(+), 22 deletions(-) diff --git a/superset/common/query_context_factory.py b/superset/common/query_context_factory.py index 215afc4a66ab3..2056109bbff70 100644 --- a/superset/common/query_context_factory.py +++ b/superset/common/query_context_factory.py @@ -58,9 +58,9 @@ def create( result_type = result_type or ChartDataResultType.FULL result_format = result_format or ChartDataResultFormat.JSON queries_ = [ - self._query_object_factory.create(result_type, - datasource=datasource, - **query_obj) + self._query_object_factory.create( + result_type, datasource=datasource, **query_obj + ) for query_obj in queries ] cache_values = { diff --git a/superset/common/query_object.py b/superset/common/query_object.py index 139151393a903..4517b10cf6ed9 100644 --- a/superset/common/query_object.py +++ b/superset/common/query_object.py @@ -41,10 +41,10 @@ find_duplicates, get_column_names, get_metric_names, + get_username, is_adhoc_metric, json_int_dttm_ser, QueryObjectFilterClause, - get_username, ) from superset.utils.date_parser import parse_human_timedelta from superset.utils.hashing import md5_sha_from_dict @@ -399,21 +399,22 @@ def cache_key(self, **extra: Any) -> str: cache_dict["annotation_layers"] = annotation_layers # Add an impersonation key to cache if impersonation is enabled on the db - if feature_flag_manager.is_feature_enabled("CACHE_IMPERSONATION") and \ - self.datasource and hasattr(self.datasource, 'database') and \ - self.datasource.database.impersonate_user: - - username = get_username() - - if impersonation_key := self \ - .datasource \ - .database.db_engine_spec \ - .get_impersonation_key(username): - - logger.debug('Adding impersonation key to QueryObject cache dict: %s', - impersonation_key) + if ( + feature_flag_manager.is_feature_enabled("CACHE_IMPERSONATION") + and self.datasource + and hasattr(self.datasource, "database") + and self.datasource.database.impersonate_user + ): + + if key := self.datasource.database.db_engine_spec.get_impersonation_key( + get_username() + ): + + logger.debug( + "Adding impersonation key to QueryObject cache dict: %s", key + ) - cache_dict['impersonation_key'] = impersonation_key + cache_dict["impersonation_key"] = key return md5_sha_from_dict(cache_dict, default=json_int_dttm_ser, ignore_nan=True) diff --git a/superset/db_engine_specs/base.py b/superset/db_engine_specs/base.py index 85817c70d4e2b..95cbe70a818c4 100644 --- a/superset/db_engine_specs/base.py +++ b/superset/db_engine_specs/base.py @@ -1538,10 +1538,7 @@ def parse_sql(cls, sql: str) -> List[str]: return [str(s).strip(" ;") for s in sqlparse.parse(sql)] @classmethod - def get_impersonation_key( - cls, - username: Optional[str] - ) -> Any: + def get_impersonation_key(cls, username: Optional[str]) -> Any: """ Construct an impersonation key, by default it's the given username. From 0731669797916234419ce21833d692a0a0c6c47b Mon Sep 17 00:00:00 2001 From: Samira-El Date: Thu, 16 Jun 2022 10:29:11 +0300 Subject: [PATCH 09/10] pass a User instance --- superset/common/query_object.py | 3 ++- superset/db_engine_specs/base.py | 9 ++++++--- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/superset/common/query_object.py b/superset/common/query_object.py index 4517b10cf6ed9..22c57d1e5be01 100644 --- a/superset/common/query_object.py +++ b/superset/common/query_object.py @@ -23,6 +23,7 @@ from pprint import pformat from typing import Any, Dict, List, NamedTuple, Optional, TYPE_CHECKING +from flask import g from flask_babel import gettext as _ from pandas import DataFrame @@ -407,7 +408,7 @@ def cache_key(self, **extra: Any) -> str: ): if key := self.datasource.database.db_engine_spec.get_impersonation_key( - get_username() + getattr(g, "user", None) ): logger.debug( diff --git a/superset/db_engine_specs/base.py b/superset/db_engine_specs/base.py index 95cbe70a818c4..b4f4ec25c451e 100644 --- a/superset/db_engine_specs/base.py +++ b/superset/db_engine_specs/base.py @@ -41,6 +41,7 @@ from apispec import APISpec from apispec.ext.marshmallow import MarshmallowPlugin from flask import current_app +from flask_appbuilder.security.sqla.models import User from flask_babel import gettext as __, lazy_gettext as _ from marshmallow import fields, Schema from marshmallow.validate import Range @@ -1538,13 +1539,15 @@ def parse_sql(cls, sql: str) -> List[str]: return [str(s).strip(" ;") for s in sqlparse.parse(sql)] @classmethod - def get_impersonation_key(cls, username: Optional[str]) -> Any: + def get_impersonation_key(cls, user: Optional[User]) -> Any: """ Construct an impersonation key, by default it's the given username. - :param username: logged in user's username + :param user: logged in user + + :returns: username if given user is not null """ - return username + return user.username if user else None # schema for adding a database by providing parameters instead of the From 92498a542d3337a78f1a89658dc4a2c92b956b11 Mon Sep 17 00:00:00 2001 From: Samira-El Date: Thu, 16 Jun 2022 10:33:46 +0300 Subject: [PATCH 10/10] remove unnecessary import --- superset/common/query_object.py | 1 - 1 file changed, 1 deletion(-) diff --git a/superset/common/query_object.py b/superset/common/query_object.py index 22c57d1e5be01..a8585fd47e055 100644 --- a/superset/common/query_object.py +++ b/superset/common/query_object.py @@ -42,7 +42,6 @@ find_duplicates, get_column_names, get_metric_names, - get_username, is_adhoc_metric, json_int_dttm_ser, QueryObjectFilterClause,