Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

FEAT-#7459: Add methods to get and set backend. #7460

Merged
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
FIX-#7461: Set backend correctly with environment variables.
Signed-off-by: sfc-gh-mvashishtha <mahesh.vashishtha@snowflake.com>
  • Loading branch information
sfc-gh-mvashishtha committed Mar 11, 2025
commit 71bafe72b21cc398690c6c2cf23b868f7e5765ce
158 changes: 147 additions & 11 deletions modin/config/envvars.py
Original file line number Diff line number Diff line change
@@ -41,25 +41,23 @@
varname: Optional[str] = None

@classmethod
def _get_raw_from_config(cls) -> str:
def _get_value_from_config(cls) -> Any:
"""
Read the value from environment variable.

Returns
-------
str
Config raw value.

Raises
------
TypeError
If `varname` is None.
KeyError
If value is absent.
Any
Config raw value if it's set, otherwise `_UNSET`.
"""
if cls.varname is None:
raise TypeError("varname should not be None")
return os.environ[cls.varname]
if cls.varname not in os.environ:
return _UNSET
raw = os.environ[cls.varname]
if not _TYPE_PARAMS[cls.type].verify(raw):
raise ValueError(f"Unsupported raw value: {raw}")

Check warning on line 59 in modin/config/envvars.py

Codecov / codecov/patch

modin/config/envvars.py#L59

Added line #L59 was not covered by tests
return _TYPE_PARAMS[cls.type].decode(raw)

@classmethod
def get_help(cls) -> str:
@@ -271,6 +269,43 @@
)
)

@classmethod
def get(cls) -> str:
"""
Get the engine value.

Returns
-------
str
Engine value.
"""
# We have to override get() because Engine may need to get its value
# from the OS's environment variables for Backend or Engine.

cls._warn_if_deprecated()

# First, check if we've already set the engine value.
if cls._value is not _UNSET:
return cls._value

engine_config_value = cls._get_value_from_config()
backend_config_value = Backend._get_value_from_config()

# If Engine is in the OS's configuration, use the configured Engine value.
# Otherwise, use the Backend config value if that exists. If it doesn't,
# fall back to the default Engine value.
cls._value = (
engine_config_value
if engine_config_value is not _UNSET
else (
Backend.get_execution_for_backend(backend_config_value).engine
if backend_config_value is not _UNSET
else cls._get_default()
)
)

return cls._value


class StorageFormat(EnvironmentVariable, type=str):
"""Engine to run on a single node of distribution."""
@@ -293,6 +328,43 @@
)
)

@classmethod
def get(cls) -> str:
"""
Get the storage format value.

Returns
-------
str
Storage format value.
"""
# We have to override get() because StorageFormat may need to get its
# value from the OS's environment variables for Backend or StorageFormat.

cls._warn_if_deprecated()

# First, check if we've already set the engine value.
if cls._value is not _UNSET:
return cls._value

storage_format_config_value = cls._get_value_from_config()
backend_config_value = Backend._get_value_from_config()

# If StorageFormat is in the OS's configuration, use the configured
# StorageFormat value. Otherwise, use the Backend config value if that
# exists. If it doesn't, fall back to the default StorageFormat value.
cls._value = (
storage_format_config_value
if storage_format_config_value is not _UNSET
else (
Backend.get_execution_for_backend(backend_config_value).storage_format
if backend_config_value is not _UNSET
else cls._get_default()
)
)

return cls._value

varname = "MODIN_STORAGE_FORMAT"
default = "Pandas"
choices = ("Pandas", "Native")
@@ -420,6 +492,62 @@
)
return cls._EXECUTION_TO_BACKEND[execution]

@classmethod
def get_execution_for_backend(cls, backend: str) -> Execution:
"""
Get the execution for the given backend.

Parameters
----------
backend : str
Backend to get the execution for.

Returns
-------
execution : Execution
The execution for the given backend
"""
if backend not in cls._BACKEND_TO_EXECUTION:
raise ValueError(

Check warning on line 511 in modin/config/envvars.py

Codecov / codecov/patch

modin/config/envvars.py#L510-L511

Added lines #L510 - L511 were not covered by tests
f"Backend '{backend}' has no known execution. Please "
+ "register an execution for it with Backend.register_backend()."
)
return cls._BACKEND_TO_EXECUTION[backend]

Check warning on line 515 in modin/config/envvars.py

Codecov / codecov/patch

modin/config/envvars.py#L515

Added line #L515 was not covered by tests

@classmethod
def get(cls) -> str:
"""
Get the backend.

Returns
-------
str
Backend.
"""
# We have to override get() because Backend may need to get its value
# from the OS's environment variables for Backend or Engine.

cls._warn_if_deprecated()

# First, check if we've already set the Backend value.
if cls._value is not _UNSET:
return cls._value

backend_config_value = Backend._get_value_from_config()

# If Backend is in the OS's configuration, use the configured Backend
# value. Otherwise, we need to figure out the Backend value based on
# the Engine and StorageFormat values.
cls._value = (
backend_config_value
if backend_config_value is not _UNSET
else cls.get_backend_for_execution(
Execution(storage_format=StorageFormat.get(), engine=Engine.get())
)
)

return cls._value


Backend.register_backend("Ray", Execution("Pandas", "Ray"))
Backend.register_backend("Dask", Execution("Pandas", "Dask"))
@@ -1136,5 +1264,13 @@
FutureWarning,
)

if Backend.varname in os.environ and (
Engine.varname in os.environ or StorageFormat.varname in os.environ
):
# Handling this case is tricky, in part because the combination of
# Backend and Engine/StorageFormat may be invalid. For now just
# disallow it.
raise Exception("Can't specify both execution and backend in environment")

Check warning on line 1273 in modin/config/envvars.py

Codecov / codecov/patch

modin/config/envvars.py#L1273

Added line #L1273 was not covered by tests


_check_vars()
38 changes: 16 additions & 22 deletions modin/config/pubsub.py
Original file line number Diff line number Diff line change
@@ -224,19 +224,22 @@
_deprecation_descriptor: Optional[DeprecationDescriptor] = None

@classmethod
def _get_raw_from_config(cls) -> str:
def _warn_if_deprecated(cls) -> None:
"""Warn that the variable is deprecated if it has a deprecation descriptor."""
if cls._deprecation_descriptor is not None:
warnings.warn(

Check warning on line 230 in modin/config/pubsub.py

Codecov / codecov/patch

modin/config/pubsub.py#L230

Added line #L230 was not covered by tests
cls._deprecation_descriptor.deprecation_message(), FutureWarning
)

@classmethod
def _get_value_from_config(cls) -> Any:
"""
Read the value from config storage.

Returns
-------
str
Config raw value.

Raises
------
KeyError
If value is absent.
Any
Config raw value if it's set, otherwise `_UNSET`.

Notes
-----
@@ -332,21 +335,15 @@
Any
Decoded and verified config value.
"""
if cls._deprecation_descriptor is not None:
warnings.warn(
cls._deprecation_descriptor.deprecation_message(), FutureWarning
)
cls._warn_if_deprecated()
if cls._value is _UNSET:
# get the value from env
try:
raw = cls._get_raw_from_config()
except KeyError:
config_value = cls._get_value_from_config()
if config_value is _UNSET:
cls._value = cls._get_default()
cls._value_source = ValueSource.DEFAULT
else:
if not _TYPE_PARAMS[cls.type].verify(raw):
raise ValueError(f"Unsupported raw value: {raw}")
cls._value = _TYPE_PARAMS[cls.type].decode(raw)
cls._value = config_value
cls._value_source = ValueSource.GOT_FROM_CFG_SOURCE
return cls._value

@@ -360,10 +357,7 @@
value : Any
Config value to set.
"""
if cls._deprecation_descriptor is not None:
warnings.warn(
cls._deprecation_descriptor.deprecation_message(), FutureWarning
)
cls._warn_if_deprecated()
cls._check_callbacks(cls._put_nocallback(value))
cls._value_source = ValueSource.SET_BY_USER

Loading
Loading