From 2d83ad9ea7411d283209ce11b8e1f889421dcefc Mon Sep 17 00:00:00 2001 From: Anjali Ratnam Date: Tue, 1 Jul 2025 08:34:13 -0700 Subject: [PATCH 1/3] rm_files fix --- adlfs/spec.py | 2 +- adlfs/tests/test_spec.py | 69 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 70 insertions(+), 1 deletion(-) diff --git a/adlfs/spec.py b/adlfs/spec.py index 90916e3c..f6a779e3 100644 --- a/adlfs/spec.py +++ b/adlfs/spec.py @@ -1276,7 +1276,7 @@ async def _rm_files( for file in file_paths: self.invalidate_cache(self._parent(file)) - sync_wrapper(_rm_files) + rm_files = sync_wrapper(_rm_files) async def _separate_directory_markers_for_non_empty_directories( self, file_paths: typing.Iterable[str] diff --git a/adlfs/tests/test_spec.py b/adlfs/tests/test_spec.py index 91f5d0df..886c1943 100644 --- a/adlfs/tests/test_spec.py +++ b/adlfs/tests/test_spec.py @@ -10,6 +10,8 @@ import numpy as np import pandas as pd import pytest +from azure.core.exceptions import ResourceNotFoundError +from azure.storage.blob.aio import BlobServiceClient as AIOBlobServiceClient from packaging.version import parse as parse_version from pandas.testing import assert_frame_equal @@ -2140,3 +2142,70 @@ def test_blobfile_default_blocksize(storage): "data/root/a/file.txt", ) assert f.blocksize == 50 * 2**20 + + +def test_rm_files(storage): + fs = AzureBlobFileSystem( + account_name=storage.account_name, + connection_string=CONN_STR, + ) + file_list = [ + "top_file.txt", + "root/a/file.txt", + "root/a1/file1.txt", + ] + + fs.rm_files("data", file_list) + for file in file_list: + with pytest.raises(FileNotFoundError): + fs.ls(f"data/{file}") + + +def test_rm_files_nonempty_directory_marker(storage): + fs = AzureBlobFileSystem( + account_name=storage.account_name, + connection_string=CONN_STR, + ) + + with pytest.raises(ResourceNotFoundError): + fs.rm_files("data", ["root/a/"]) + + assert fs.ls("data/root/a/") == ["data/root/a/file.txt"] + + +def test_rm_files_delete_directory_markers(storage, mocker): + mock_container = mocker.AsyncMock() + mock_container.delete_blob = mocker.AsyncMock(return_value=None) + mock_get_container_client = mocker.AsyncMock() + mock_get_container_client.__aenter__.return_value = mock_container + mock_get_container_client.__aexit__.return_value = None + mocker.patch.object( + AIOBlobServiceClient, + "get_container_client", + return_value=mock_get_container_client, + ) + fs = AzureBlobFileSystem( + account_name=storage.account_name, + connection_string=CONN_STR, + ) + + files = [blob.name for blob in storage.get_container_client("data").list_blobs()] + directory_markers = [ + "root/a/", + "root/a1/", + "root/b/", + "root/c/", + "root/d/", + "root/e+f/", + ] + + mocker.patch.object( + fs, + "_separate_directory_markers_for_non_empty_directories", + return_value=(files, directory_markers), + ) + + fs.rm_files("data", files) + expected_calls = [mocker.call(dir) for dir in reversed(directory_markers)] + actual_calls = mock_container.delete_blob.call_args_list[-len(directory_markers) :] + assert actual_calls == expected_calls From 0c62e0043797f58c760127b1a1eca9dfca912a92 Mon Sep 17 00:00:00 2001 From: Anjali Ratnam Date: Wed, 20 Aug 2025 16:53:09 -0700 Subject: [PATCH 2/3] added rm_file --- CHANGELOG.md | 1 + adlfs/spec.py | 27 ++++++++++++++++- adlfs/tests/test_spec.py | 63 +++++----------------------------------- 3 files changed, 35 insertions(+), 56 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5e6e2ba2..191baa82 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,7 @@ Unreleased - The block size is now used for partitioned uploads. Previously, 1 GiB was used for each uploaded block irrespective of the block size - Updated default block size to be 50 MiB. Set `blocksize` for `AzureBlobFileSystem` or `block_size` when opening `AzureBlobFile` to revert back to 5 MiB default. - `AzureBlobFile` now inherits the block size from `AzureBlobFileSystem` when fs.open() is called and a block_size is not passed in. +- Added `AzureBlobFileSystem.rm_file()` 2024.12.0 diff --git a/adlfs/spec.py b/adlfs/spec.py index f6a779e3..cf47a957 100644 --- a/adlfs/spec.py +++ b/adlfs/spec.py @@ -1276,7 +1276,32 @@ async def _rm_files( for file in file_paths: self.invalidate_cache(self._parent(file)) - rm_files = sync_wrapper(_rm_files) + sync_wrapper(_rm_files) + + async def _rm_file( + self, path: typing.Union[str, typing.List[str]], delimiter: str = "/", **kwargs + ): + """Delete a file. + + Parameters + ---------- + path: str + File to delete. + """ + container_name, p, _ = self.split_path(path, delimiter=delimiter) + try: + if p != "": + await self._rm_files(container_name, [p.rstrip(delimiter)]) + else: + await self._rmdir(container_name) + except ResourceNotFoundError: + pass + except FileNotFoundError: + pass + except Exception as e: + raise RuntimeError("Failed to remove %s for %s", path, e) from e + + rm_file = sync_wrapper(_rm_file) async def _separate_directory_markers_for_non_empty_directories( self, file_paths: typing.Iterable[str] diff --git a/adlfs/tests/test_spec.py b/adlfs/tests/test_spec.py index 886c1943..aa21b49c 100644 --- a/adlfs/tests/test_spec.py +++ b/adlfs/tests/test_spec.py @@ -10,8 +10,6 @@ import numpy as np import pandas as pd import pytest -from azure.core.exceptions import ResourceNotFoundError -from azure.storage.blob.aio import BlobServiceClient as AIOBlobServiceClient from packaging.version import parse as parse_version from pandas.testing import assert_frame_equal @@ -2144,68 +2142,23 @@ def test_blobfile_default_blocksize(storage): assert f.blocksize == 50 * 2**20 -def test_rm_files(storage): +def test_rm_file(storage): fs = AzureBlobFileSystem( account_name=storage.account_name, connection_string=CONN_STR, ) - file_list = [ - "top_file.txt", - "root/a/file.txt", - "root/a1/file1.txt", - ] + path = "data/top_file.txt" - fs.rm_files("data", file_list) - for file in file_list: - with pytest.raises(FileNotFoundError): - fs.ls(f"data/{file}") + fs.rm_file(path) + with pytest.raises(FileNotFoundError): + fs.ls(path) -def test_rm_files_nonempty_directory_marker(storage): +def test_rm_file_nonempty_directory(storage): fs = AzureBlobFileSystem( account_name=storage.account_name, connection_string=CONN_STR, ) - - with pytest.raises(ResourceNotFoundError): - fs.rm_files("data", ["root/a/"]) - + path = "data/root/a/" + fs.rm_file(path) assert fs.ls("data/root/a/") == ["data/root/a/file.txt"] - - -def test_rm_files_delete_directory_markers(storage, mocker): - mock_container = mocker.AsyncMock() - mock_container.delete_blob = mocker.AsyncMock(return_value=None) - mock_get_container_client = mocker.AsyncMock() - mock_get_container_client.__aenter__.return_value = mock_container - mock_get_container_client.__aexit__.return_value = None - mocker.patch.object( - AIOBlobServiceClient, - "get_container_client", - return_value=mock_get_container_client, - ) - fs = AzureBlobFileSystem( - account_name=storage.account_name, - connection_string=CONN_STR, - ) - - files = [blob.name for blob in storage.get_container_client("data").list_blobs()] - directory_markers = [ - "root/a/", - "root/a1/", - "root/b/", - "root/c/", - "root/d/", - "root/e+f/", - ] - - mocker.patch.object( - fs, - "_separate_directory_markers_for_non_empty_directories", - return_value=(files, directory_markers), - ) - - fs.rm_files("data", files) - expected_calls = [mocker.call(dir) for dir in reversed(directory_markers)] - actual_calls = mock_container.delete_blob.call_args_list[-len(directory_markers) :] - assert actual_calls == expected_calls From 8728170bdf369a352bec4b0d200641d8e91a1e3c Mon Sep 17 00:00:00 2001 From: Anjali Ratnam Date: Fri, 22 Aug 2025 15:05:00 -0700 Subject: [PATCH 3/3] updates --- adlfs/spec.py | 14 ++++++-------- adlfs/tests/test_spec.py | 10 ---------- 2 files changed, 6 insertions(+), 18 deletions(-) diff --git a/adlfs/spec.py b/adlfs/spec.py index cf47a957..8b4b9fb7 100644 --- a/adlfs/spec.py +++ b/adlfs/spec.py @@ -1278,9 +1278,7 @@ async def _rm_files( sync_wrapper(_rm_files) - async def _rm_file( - self, path: typing.Union[str, typing.List[str]], delimiter: str = "/", **kwargs - ): + async def _rm_file(self, path: str, **kwargs): """Delete a file. Parameters @@ -1288,12 +1286,12 @@ async def _rm_file( path: str File to delete. """ - container_name, p, _ = self.split_path(path, delimiter=delimiter) + container_name, p, _ = self.split_path(path) try: - if p != "": - await self._rm_files(container_name, [p.rstrip(delimiter)]) - else: - await self._rmdir(container_name) + async with self.service_client.get_container_client( + container=container_name + ) as cc: + await cc.delete_blob(p) except ResourceNotFoundError: pass except FileNotFoundError: diff --git a/adlfs/tests/test_spec.py b/adlfs/tests/test_spec.py index aa21b49c..09e92ce5 100644 --- a/adlfs/tests/test_spec.py +++ b/adlfs/tests/test_spec.py @@ -2152,13 +2152,3 @@ def test_rm_file(storage): fs.rm_file(path) with pytest.raises(FileNotFoundError): fs.ls(path) - - -def test_rm_file_nonempty_directory(storage): - fs = AzureBlobFileSystem( - account_name=storage.account_name, - connection_string=CONN_STR, - ) - path = "data/root/a/" - fs.rm_file(path) - assert fs.ls("data/root/a/") == ["data/root/a/file.txt"]