Skip to content

Commit 3ae2a8a

Browse files
authored
Add functions for extracting multi-part archive (#709)
1 parent c0490d9 commit 3ae2a8a

File tree

1 file changed

+69
-0
lines changed

1 file changed

+69
-0
lines changed

leafmap/common.py

+69
Original file line numberDiff line numberDiff line change
@@ -4516,6 +4516,7 @@ def download_files(
45164516
unzip=True,
45174517
overwrite=False,
45184518
subfolder=False,
4519+
multi_part=False,
45194520
):
45204521
"""Download files from URLs, including Google Drive shared URL.
45214522
@@ -4534,6 +4535,14 @@ def download_files(
45344535
unzip (bool, optional): Unzip the file. Defaults to True.
45354536
overwrite (bool, optional): Overwrite the file if it already exists. Defaults to False.
45364537
subfolder (bool, optional): Create a subfolder with the same name as the file. Defaults to False.
4538+
multi_part (bool, optional): If the file is a multi-part file. Defaults to False.
4539+
4540+
Examples:
4541+
4542+
files = ["sam_hq_vit_tiny.zip", "sam_hq_vit_tiny.z01", "sam_hq_vit_tiny.z02", "sam_hq_vit_tiny.z03"]
4543+
base_url = "https://github.com/opengeos/datasets/releases/download/models/"
4544+
urls = [base_url + f for f in files]
4545+
leafmap.download_files(urls, out_dir="models", multi_part=True)
45374546
"""
45384547

45394548
if out_dir is None:
@@ -4542,12 +4551,17 @@ def download_files(
45424551
if filenames is None:
45434552
filenames = [None] * len(urls)
45444553

4554+
filepaths = []
45454555
for url, output in zip(urls, filenames):
45464556
if output is None:
45474557
filename = os.path.join(out_dir, os.path.basename(url))
45484558
else:
45494559
filename = os.path.join(out_dir, output)
45504560

4561+
filepaths.append(filename)
4562+
if multi_part:
4563+
unzip = False
4564+
45514565
download_file(
45524566
url,
45534567
filename,
@@ -4564,6 +4578,14 @@ def download_files(
45644578
subfolder,
45654579
)
45664580

4581+
if multi_part:
4582+
archive = os.path.splitext(filename)[0] + ".zip"
4583+
out_dir = os.path.dirname(filename)
4584+
extract_archive(archive, out_dir)
4585+
4586+
for file in filepaths:
4587+
os.remove(file)
4588+
45674589

45684590
def download_folder(
45694591
url=None,
@@ -13251,3 +13273,50 @@ def convert_coordinates(x, y, source_crs, target_crs="epsg:4326"):
1325113273

1325213274
# Return the converted coordinates
1325313275
return lon, lat
13276+
13277+
13278+
def extract_archive(archive, outdir=None, **kwargs):
13279+
"""
13280+
Extracts a multipart archive.
13281+
13282+
This function uses the patoolib library to extract a multipart archive.
13283+
If the patoolib library is not installed, it attempts to install it.
13284+
If the archive does not end with ".zip", it appends ".zip" to the archive name.
13285+
If the extraction fails (for example, if the files already exist), it skips the extraction.
13286+
13287+
Args:
13288+
archive (str): The path to the archive file.
13289+
outdir (str): The directory where the archive should be extracted.
13290+
**kwargs: Arbitrary keyword arguments for the patoolib.extract_archive function.
13291+
13292+
Returns:
13293+
None
13294+
13295+
Raises:
13296+
Exception: An exception is raised if the extraction fails for reasons other than the files already existing.
13297+
13298+
Example:
13299+
13300+
files = ["sam_hq_vit_tiny.zip", "sam_hq_vit_tiny.z01", "sam_hq_vit_tiny.z02", "sam_hq_vit_tiny.z03"]
13301+
base_url = "https://github.com/opengeos/datasets/releases/download/models/"
13302+
urls = [base_url + f for f in files]
13303+
leafmap.download_files(urls, out_dir="models", multi_part=True)
13304+
13305+
"""
13306+
try:
13307+
import patoolib
13308+
except ImportError:
13309+
install_package("patool")
13310+
import patoolib
13311+
13312+
if not archive.endswith(".zip"):
13313+
archive = archive + ".zip"
13314+
13315+
if outdir is None:
13316+
outdir = os.path.dirname(archive)
13317+
13318+
try:
13319+
patoolib.extract_archive(archive, outdir=outdir, **kwargs)
13320+
except Exception as e:
13321+
print("The unzipped files might already exist. Skipping extraction.")
13322+
return

0 commit comments

Comments
 (0)