Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[WIP] Add possibility to parameterize S3 service URL #137

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 5 additions & 2 deletions cloudpathlib/s3/s3client.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ def __init__(
profile_name: Optional[str] = None,
boto3_session: Optional["Session"] = None,
local_cache_dir: Optional[Union[str, os.PathLike]] = None,
endpoint_url: Optional[str] = None,
):
"""Class constructor. Sets up a boto3 [`Session`](
https://boto3.amazonaws.com/v1/documentation/api/latest/reference/core/session.html).
Expand All @@ -49,6 +50,8 @@ def __init__(
boto3_session (Optional[Session]): An already instantiated boto3 Session.
local_cache_dir (Optional[Union[str, os.PathLike]]): Path to directory to use as cache
for downloaded files. If None, will use a temporary directory.
endpoint_url (Optional[str]): S3 server endpoint URL to use for the constructed boto3 S3 resource and client.
Parameterize it to access a customly deployed S3-compatible object store such as MinIO, Ceph or any other.
"""
if boto3_session is not None:
self.sess = boto3_session
Expand All @@ -60,8 +63,8 @@ def __init__(
botocore_session=botocore_session,
profile_name=profile_name,
)
self.s3 = self.sess.resource("s3")
self.client = self.sess.client("s3")
self.s3 = self.sess.resource("s3", endpoint_url=endpoint_url)
self.client = self.sess.client("s3", endpoint_url=endpoint_url)

super().__init__(local_cache_dir=local_cache_dir)

Expand Down
24 changes: 23 additions & 1 deletion docs/docs/authentication.md
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ cloud_path.client

All subsequent instances of that service's cloud paths (in the example, all subsequent `S3Path` instances) will reference the same client instance.

You can also explicitly instantiate a client instance. You will need to do so if you want to authenticate using any option other than the environment variables from the table in the previous section. (To see what those options are, check out the API documentation pages linked to in the table above.) You can then use that client instance's cloud path factory method, or pass it into a cloud path instantiation
You can also explicitly instantiate a client instance. You will need to do so if you want to authenticate using any option other than the environment variables from the table in the previous section. (To see what those options are, check out the API documentation pages linked to in the table above.) You can then use that client instance's cloud path factory method, or pass it into a cloud path instantiation.

```python
from cloudpathlib import S3Client
Expand All @@ -59,3 +59,25 @@ If you need a reference to the default client:
S3Client.get_default_client()
#> <cloudpathlib.s3.s3client.S3Client at 0x7feac3d1fb90>
```

## Accessing custom S3-compatible object stores
It might happen so that you need to access a customly deployed S3 object store ([MinIO](https://min.io/), [Ceph](https://ceph.io/ceph-storage/object-storage/) or any other).
In such cases, the service endpoint will be different from the AWS object store endpoints (used by default).
To specify a custom endpoint address, you will need to manually instantiate `Client` with the `endpoint_url` parameter,
provinding http/https URL including port.

```python
from cloudpathlib import S3Client, CloudPath

# create a client pointing to the endpoint
client = S3Client(endpoint_url="http://my.s3.server:1234")
# option 1: use the client to create paths
cp1 = client.CloudPath("s3://cloudpathlib-test-bucket/")

# option 2: pass the client as keyword argument
cp2 = CloudPath("s3://cloudpathlib-test-bucket/", client=client)

# option3: set this client as the default so it is used in any future paths
client.set_as_default_client()
cp3 = CloudPath("s3://cloudpathlib-test-bucket/")
```
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I might set this up to show three scenarios:

from cloudpathlib import S3Client, CloudPath

# create a client pointing to the endpoint
client = S3Client(endpoint_url="http://my.s3.server:1234")

# option 1: use the client to create paths
cp1 = client.CloudPath("s3://cloudpathlib-test-bucket/")

# option 2: pass the client as keyword argument
cp2 = CloudPath("s3://cloudpathlib-test-bucket/", client=client)

# option3: set this client as the default so it is used in any future paths
client.set_as_default_client()
cp3 = CloudPath("s3://cloudpathlib-test-bucket/")

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done here fc113dc

66 changes: 65 additions & 1 deletion tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,15 +51,22 @@ class CloudProviderTestRig:
"""Class that holds together the components needed to test a cloud implementation."""

def __init__(
self, path_class: type, client_class: type, drive: str = "drive", test_dir: str = ""
self,
path_class: type,
client_class: type,
drive: str = "drive",
test_dir: str = "",
**client_kwargs: dict,
):
"""
Args:
path_class (type): CloudPath subclass
client_class (type): Client subclass
client_kwargs (dict): Kwargs, passed to instantiate the client
"""
self.path_class = path_class
self.client_class = client_class
self.client_kwargs = client_kwargs
self.drive = drive
self.test_dir = test_dir

Expand Down Expand Up @@ -212,6 +219,63 @@ def s3_rig(request, monkeypatch, assets_dir):
bucket.objects.filter(Prefix=test_dir).delete()


@fixture()
def custom_s3_rig(request, monkeypatch, assets_dir):
Copy link
Member

@pjbull pjbull Mar 20, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Let's add a docstring that this is used for testing things like MinIO/ceph

Copy link
Contributor Author

@YevheniiSemendiak YevheniiSemendiak Mar 23, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done here 762c884

"""
Custom S3 rig used to test the integrations with non-AWS S3-compatible object storages like
- MinIO (https://min.io/)
- CEPH (https://ceph.io/ceph-storage/object-storage/)
- others
"""
drive = os.getenv("CUSTOM_S3_BUCKET", "bucket")
custom_endpoint_url = os.getenv("CUSTOM_S3_ENDPOINT")
custom_key_id = os.getenv("CUSTOM_S3_KEY_ID")
custom_secret_key = os.getenv("CUSTOM_S3_SECRET_KEY")
test_dir = create_test_dir_name(request)

if os.getenv("USE_LIVE_CLOUD") == "1":
# Upload test assets
s3 = boto3.resource(
"s3",
aws_access_key_id=custom_key_id,
aws_secret_access_key=custom_secret_key,
endpoint_url=custom_endpoint_url,
)
bucket = s3.Bucket(drive)
test_files = [
f for f in assets_dir.glob("**/*") if f.is_file() and f.name not in UPLOAD_IGNORE_LIST
]
for test_file in test_files:
bucket.upload_file(
str(test_file),
str(f"{test_dir}/{PurePosixPath(test_file.relative_to(assets_dir))}"),
)
else:
# Mock cloud SDK
monkeypatch.setattr(
cloudpathlib.s3.s3client,
"Session",
mocked_session_class_factory(test_dir),
)

rig = CloudProviderTestRig(
path_class=S3Path,
client_class=S3Client,
drive=drive,
test_dir=test_dir,
endpoint_url=custom_endpoint_url,
aws_access_key_id=custom_key_id,
aws_secret_access_key=custom_secret_key,
)

rig.client_class(**rig.client_kwargs).set_as_default_client() # set default client

yield rig

rig.client_class._default_client = None # reset default client
bucket.objects.filter(Prefix=test_dir).delete()


@fixture()
def local_azure_rig(request, monkeypatch, assets_dir):
drive = os.getenv("LIVE_AZURE_CONTAINER", "container")
Expand Down
4 changes: 2 additions & 2 deletions tests/mock_clients/mock_s3.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,10 +29,10 @@ def __init__(self, *args, **kwargs):
def __del__(self):
self.tmp.cleanup()

def resource(self, item):
def resource(self, item, endpoint_url):
return MockBoto3Resource(self.tmp_path)

def client(self, item):
def client(self, item, endpoint_url):
return MockBoto3Client(self.tmp_path)

return MockBoto3Session
Expand Down