ONEcampaign · jm-rivera · Dec 19, 2025 · Dec 19, 2025
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,8 +1,11 @@
 # Changelog for oda_reader
 
+## 1.4.1 (2025-12-19)
+- Extends bulk download auto-detection to support `.csv` files in addition to `.txt` files.
+
 ## 1.4.0 (2025-12-19)
 - Adds `bulk_download_dac2a()` function for bulk downloading the full DAC2A dataset.
-- Auto-detects file types (parquet vs txt/csv) in bulk downloads, removing the need for the `is_txt` parameter.
+- Auto-detects file types (parquet or txt) in bulk downloads, removing the need for the `is_txt` parameter.
 - Auto-detects CSV delimiters (comma, pipe, tab, semicolon) when reading txt files from bulk downloads.
 - Deprecates the `is_txt` parameter in `bulk_download_parquet()`. The parameter is still accepted for backward compatibility but emits a deprecation warning and will be removed in a future major release.
 - Adds pytest and pytest-mock to dev dependencies for improved testing support.

diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "oda_reader"
-version = "1.4.0"
+version = "1.4.1"
 description = "A simple package to import ODA data from the OECD's API and AidData's database"
 readme = "README.md"
 license = "MIT"

diff --git a/src/oda_reader/download/download_tools.py b/src/oda_reader/download/download_tools.py
@@ -226,16 +226,16 @@ def _save_or_return_parquet_files_from_content(
     *,
     as_iterator: bool = False,
 ) -> list[pd.DataFrame] | None | typing.Iterator[pd.DataFrame]:
-    """Extract parquet or txt (CSV) files from a zip archive.
+    """Extract parquet, csv, or txt files from a zip archive.
 
     If `save_to_path` is provided the files are extracted and written
     to disk. Otherwise the contents are returned either as a list of
     `DataFrame` objects or, when `as_iterator` is `True`, as an iterator
     yielding one `DataFrame` per row group (parquet only).
 
-    The function auto-detects whether the zip contains parquet or txt files.
-    Txt files have their delimiter auto-detected (comma or pipe) and are
-    converted to parquet when saving.
+    The function auto-detects whether the zip contains parquet, csv, or txt files.
+    CSV/txt files have their delimiter auto-detected (comma, pipe, tab, etc.) and
+    are converted to parquet when saving.
 
     Args:
         response_content: Bytes or `Path` pointing to the zipped file.
@@ -252,7 +252,11 @@ def _save_or_return_parquet_files_from_content(
 
     with _open_zip(response_content=response_content) as z:
         parquet_files = [name for name in z.namelist() if name.endswith(".parquet")]
-        txt_files = [name for name in z.namelist() if name.endswith(".txt")]
+        csv_files = [
+            name
+            for name in z.namelist()
+            if name.endswith(".txt") or name.endswith(".csv")
+        ]
 
         # Determine which file type we're dealing with
         if parquet_files:
@@ -276,15 +280,18 @@ def _save_or_return_parquet_files_from_content(
             logger.info(f"Reading {len(parquet_files)} parquet files.")
             return [pd.read_parquet(z.open(file)) for file in parquet_files]
 
-        elif txt_files:
+        elif csv_files:
             if as_iterator:
-                raise ValueError("Streaming not supported for txt files.")
+                raise ValueError("Streaming not supported for csv/txt files.")
 
             if save_to_path:
                 save_to_path.mkdir(parents=True, exist_ok=True)
-                for file_name in txt_files:
+                for file_name in csv_files:
                     clean_name = (
-                        file_name.replace(".txt", ".parquet").lower().replace(" ", "_")
+                        file_name.replace(".txt", ".parquet")
+                        .replace(".csv", ".parquet")
+                        .lower()
+                        .replace(" ", "_")
                     )
                     logger.info(f"Saving {clean_name}")
                     with z.open(file_name) as f_in:
@@ -299,9 +306,9 @@ def _save_or_return_parquet_files_from_content(
                         ).to_parquet(save_to_path / clean_name)
                 return None
 
-            logger.info(f"Reading {len(txt_files)} txt files.")
+            logger.info(f"Reading {len(csv_files)} csv/txt files.")
             dfs = []
-            for file_name in txt_files:
+            for file_name in csv_files:
                 with z.open(file_name) as f_in:
                     delimiter = _detect_delimiter(f_in)
                     logger.info(f"Detected delimiter for {file_name}: '{delimiter}'")
@@ -317,14 +324,14 @@ def _save_or_return_parquet_files_from_content(
             return dfs
 
         else:
-            raise ValueError("No parquet or txt files found in the zip archive.")
+            raise ValueError("No parquet, csv, or txt files found in the zip archive.")
 
 
 def _save_or_return_parquet_files_from_txt_in_zip(
     response_content: bytes | Path,
     save_to_path: Path | str | None = None,
 ) -> list[pd.DataFrame] | None:
-    """Extract a `.txt` file from a zipped archive supplied as bytes or a file path.
+    """Extract csv or txt files from a zipped archive supplied as bytes or a file path.
 
     The file is read as CSV (with auto-detected delimiter) and optionally saved
     as a parquet file.
@@ -341,15 +348,22 @@ def _save_or_return_parquet_files_from_txt_in_zip(
     save_to_path = Path(save_to_path).expanduser().resolve() if save_to_path else None
 
     with _open_zip(response_content=response_content) as z:
-        # Find all txt files in the zip archive
-        files = [name for name in z.namelist() if name.endswith(".txt")]
+        # Find all csv/txt files in the zip archive
+        files = [
+            name
+            for name in z.namelist()
+            if name.endswith(".txt") or name.endswith(".csv")
+        ]
 
         # If save_to_path is provided, save the files to the path
         if save_to_path:
             save_to_path.mkdir(parents=True, exist_ok=True)
             for file_name in files:
                 clean_name = (
-                    file_name.replace(".txt", ".parquet").lower().replace(" ", "_")
+                    file_name.replace(".txt", ".parquet")
+                    .replace(".csv", ".parquet")
+                    .lower()
+                    .replace(" ", "_")
                 )
                 logger.info(f"Saving {clean_name}")
                 with z.open(file_name) as f_in:
@@ -501,7 +515,7 @@ def bulk_download_parquet(
     """Download data from the stats.oecd.org file download service.
 
     Certain data files are available as a bulk download. This function
-    downloads the files (parquet or txt/csv) and returns a single DataFrame.
+    downloads the files (parquet, csv, or txt) and returns a single DataFrame.
     The file type is auto-detected from the zip contents.
 
     Args:

diff --git a/tests/download/unit/test_download_tools.py b/tests/download/unit/test_download_tools.py
@@ -140,7 +140,7 @@ def _create_zip_with_parquet(self) -> bytes:
         return zip_buffer.getvalue()
 
     def _create_zip_with_txt(self, delimiter: str = ",") -> bytes:
-        """Create a zip file containing a txt/CSV file."""
+        """Create a zip file containing a txt file."""
         if delimiter == "|":
             csv_content = "col1|col2|col3\n1|2|3\n4|5|6"
         else:
@@ -151,6 +151,18 @@ def _create_zip_with_txt(self, delimiter: str = ",") -> bytes:
             z.writestr("test_data.txt", csv_content.encode("utf-8"))
         return zip_buffer.getvalue()
 
+    def _create_zip_with_csv(self, delimiter: str = ",") -> bytes:
+        """Create a zip file containing a .csv file."""
+        if delimiter == "|":
+            csv_content = "col1|col2|col3\n1|2|3\n4|5|6"
+        else:
+            csv_content = "col1,col2,col3\n1,2,3\n4,5,6"
+
+        zip_buffer = io.BytesIO()
+        with zipfile.ZipFile(zip_buffer, "w") as z:
+            z.writestr("test_data.csv", csv_content.encode("utf-8"))
+        return zip_buffer.getvalue()
+
     def test_auto_detect_parquet_files(self):
         """Test that parquet files are auto-detected and read correctly."""
         zip_content = self._create_zip_with_parquet()
@@ -187,6 +199,48 @@ def test_auto_detect_txt_files_pipe(self):
         assert list(df.columns) == ["col1", "col2", "col3"]
         assert len(df) == 2
 
+    def test_auto_detect_csv_files(self):
+        """Test that .csv files are auto-detected and read correctly."""
+        zip_content = self._create_zip_with_csv(delimiter=",")
+
+        result = _save_or_return_parquet_files_from_content(zip_content)
+
+        assert result is not None
+        assert len(result) == 1
+        assert isinstance(result[0], pd.DataFrame)
+        assert list(result[0].columns) == ["col1", "col2", "col3"]
+        assert len(result[0]) == 2
+
+    def test_auto_detect_csv_files_pipe(self):
+        """Test that pipe-delimited .csv files are auto-detected."""
+        zip_content = self._create_zip_with_csv(delimiter="|")
+
+        result = _save_or_return_parquet_files_from_content(zip_content)
+
+        assert result is not None
+        assert len(result) == 1
+        df = result[0]
+        assert isinstance(df, pd.DataFrame)
+        assert list(df.columns) == ["col1", "col2", "col3"]
+        assert len(df) == 2
+
+    def test_save_csv_as_parquet_to_path(self, tmp_path):
+        """Test that .csv files are converted to parquet when saving."""
+        zip_content = self._create_zip_with_csv()
+
+        result = _save_or_return_parquet_files_from_content(
+            zip_content, save_to_path=tmp_path
+        )
+
+        assert result is None
+        saved_files = list(tmp_path.glob("*.parquet"))
+        assert len(saved_files) == 1
+        # Verify conversion to parquet with correct name
+        assert saved_files[0].suffix == ".parquet"
+        assert "test_data" in saved_files[0].name
+        df = pd.read_parquet(saved_files[0])
+        assert len(df) == 2
+
     def test_save_parquet_to_path(self, tmp_path):
         """Test saving parquet files to a path."""
         zip_content = self._create_zip_with_parquet()
@@ -224,7 +278,7 @@ def test_raises_on_empty_zip(self):
         with zipfile.ZipFile(zip_buffer, "w") as z:
             z.writestr("readme.md", "Not a data file")
 
-        with pytest.raises(ValueError, match="No parquet or txt files"):
+        with pytest.raises(ValueError, match="No parquet, csv, or txt files"):
             _save_or_return_parquet_files_from_content(zip_buffer.getvalue())
 
     def test_txt_iterator_raises(self):

diff --git a/uv.lock b/uv.lock