Skip to content

Feature range parameter support - Unit Tests #1

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 8 commits into
base: master
Choose a base branch
from
2 changes: 2 additions & 0 deletions codebeaver.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
from: pytest
# This file was generated automatically by CodeBeaver based on your repository. Learn how to customize it here: https://docs.codebeaver.ai/open-source/codebeaver-yml/
85 changes: 85 additions & 0 deletions mlxtend/data/tests/test_iris.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,3 +35,88 @@ def test_iris_invalid_choice():
with pytest.raises(ValueError) as excinfo:
iris_data(version="bla")
assert excinfo.value.message == "version must be 'uci' or 'corrected'."

def test_iris_data_invalid_version_type():
"""Test that providing a non-string version value raises a ValueError."""
with pytest.raises(ValueError) as excinfo:
iris_data(version=None)
# Check that the error message contains the expected text
assert "version must be 'uci' or 'corrected'" in str(excinfo.value)

def test_iris_data_dtype_and_shape():
"""Test that iris_data returns numpy arrays with expected dtypes and shapes for both versions."""
for version in ["uci", "corrected"]:
X, y = iris_data(version=version)
# Check types: X should be a float array, y an integer array.
assert isinstance(X, np.ndarray)
assert isinstance(y, np.ndarray)
# Check expected shapes: there are 150 samples and 4 features
assert X.shape == (150, 4)
assert y.shape == (150,)
# Check that X's dtype is float and y's dtype is a kind of integer.
assert X.dtype in [np.float64, np.float32]
assert np.issubdtype(y.dtype, np.integer)

def test_iris_data_file_not_found(monkeypatch):
"""Test that iris_data propagates file not found errors from np.genfromtxt."""
def fake_genfromtxt(*args, **kwargs):
raise IOError("File not found")
# Patch np.genfromtxt so that it raises an IOError to simulate a missing file.
monkeypatch.setattr(np, "genfromtxt", fake_genfromtxt)
with pytest.raises(IOError) as excinfo:
iris_data(version="uci")
assert "File not found" in str(excinfo.value)
def test_iris_data_empty_version(monkeypatch):
"""Test that providing an empty string as version raises a ValueError."""
with pytest.raises(ValueError) as excinfo:
iris_data(version="")
# Check that the error message contains the expected text
assert "version must be 'uci' or 'corrected'" in str(excinfo.value)

def test_iris_data_uppercase_version(monkeypatch):
"""Test that providing an uppercase version string (e.g., 'UCI') raises a ValueError."""
with pytest.raises(ValueError) as excinfo:
iris_data(version="UCI")
# Check that the error message contains the expected text
assert "version must be 'uci' or 'corrected'" in str(excinfo.value)

def test_iris_data_incorrect_shape(monkeypatch):
"""Test that iris_data raises an IndexError when the data shape is insufficient for the 'corrected' version.
This simulates a scenario where np.genfromtxt returns an array with too few rows.
"""
def fake_genfromtxt(*args, **kwargs):
# Simulate a small array with only 30 rows (instead of the expected 150) and 5 columns
return np.zeros((30, 5))

monkeypatch.setattr(np, "genfromtxt", fake_genfromtxt)
with pytest.raises(IndexError):
iris_data(version="corrected")

def test_iris_data_returns_distinct_arrays():
"""Test that iris_data returns distinct array objects on consecutive calls,
so that modifications to one do not affect the other.
"""
iris_x1, iris_y1 = iris_data()
iris_x2, iris_y2 = iris_data()
# Check that the returned arrays are not the same objects in memory
assert iris_x1 is not iris_x2
assert iris_y1 is not iris_y2
def test_iris_data_numeric_version():
"""Test that providing a non-string numeric version (e.g., 123) raises a ValueError."""
with pytest.raises(ValueError) as excinfo:
iris_data(version=123)
# Check that the error message contains the expected text
assert "version must be 'uci' or 'corrected'" in str(excinfo.value)

def test_iris_data_whitespace_version():
"""Test that providing a version string with extra whitespace (e.g., ' uci ') raises a ValueError."""
with pytest.raises(ValueError) as excinfo:
iris_data(version=" uci ")
# Check that the error message contains the expected text
assert "version must be 'uci' or 'corrected'" in str(excinfo.value)

def test_iris_data_empty_file(monkeypatch):
"""Test that iris_data raises an IndexError when np.genfromtxt returns an empty array (simulating an empty data file)."""
monkeypatch.setattr(np, "genfromtxt", lambda *args, **kwargs: np.array([]))
with pytest.raises(IndexError):
iris_data(version="uci")
140 changes: 140 additions & 0 deletions tests/test_autompg.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,140 @@
import numpy as np
import pytest
import os
from mlxtend.data.autompg import autompg_data

def test_autompg_data_returns_correct_arrays(monkeypatch):
"""Test that autompg_data returns correct X and y arrays given valid input data."""
# Create dummy data with shape (3, 6): 5 features + 1 target (3 samples, 6 columns)
dummy_data = np.array([
[1, 2, 3, 4, 5, 6],
[7, 8, 9, 10, 11, 12],
[13, 14, 15, 16, 17, 18]
])

# Monkeypatch np.genfromtxt to return dummy_data regardless of file name or delimiter.
monkeypatch.setattr(np, "genfromtxt", lambda fname, delimiter: dummy_data)

X, y = autompg_data()

# X should be all columns except the last one, y should be the last column.
expected_X = dummy_data[:, :-1]
expected_y = dummy_data[:, -1]
np.testing.assert_array_equal(X, expected_X)
np.testing.assert_array_equal(y, expected_y)

def test_autompg_data_empty(monkeypatch):
"""Test that autompg_data returns empty arrays when input data is empty."""
# Create an empty dummy data with 5 columns (4 features + 1 target).
dummy_data = np.empty((0, 5))

monkeypatch.setattr(np, "genfromtxt", lambda fname, delimiter: dummy_data)

X, y = autompg_data()

assert X.shape == (0, 4) # since dummy_data has 5 columns and X excludes the last column
assert y.shape == (0,)

def test_autompg_data_invalid_input(monkeypatch):
"""Test that autompg_data raises an error when data is invalid (e.g., 1-dimensional)."""
# Return a 1D array instead of a 2D array, which will cause slicing to fail.
dummy_data = np.array([1, 2, 3, 4, 5])

monkeypatch.setattr(np, "genfromtxt", lambda fname, delimiter: dummy_data)

with pytest.raises(IndexError):
autompg_data()
def test_autompg_data_single_sample(monkeypatch):
"""Test that autompg_data correctly parses a dataset with a single sample."""
# Create dummy data with a single sample (row) with 6 columns (5 features + 1 target)
dummy_data = np.array([[10, 20, 30, 40, 50, 60]])
monkeypatch.setattr(np, "genfromtxt", lambda fname, delimiter: dummy_data)

X, y = autompg_data()
expected_X = dummy_data[:, :-1]
expected_y = dummy_data[:, -1]
np.testing.assert_array_equal(X, expected_X)
np.testing.assert_array_equal(y, expected_y)

def test_autompg_data_one_column(monkeypatch):
"""Test that autompg_data returns correct shapes when the input data has only one column.
In such a case, since X is taken as all columns except the last and the only column is the target,
X will be an empty array of shape (n, 0) and y will have shape (n,)."""
dummy_data = np.array([[100], [200], [300]])
monkeypatch.setattr(np, "genfromtxt", lambda fname, delimiter: dummy_data)

X, y = autompg_data()
# X should have 0 columns since dummy_data has only one column
assert X.shape == (3, 0)
# y should be a 1-dimensional array of length 3
assert y.shape == (3,)

def test_autompg_data_none(monkeypatch):
"""Test that autompg_data raises a TypeError when np.genfromtxt returns None (no data)."""
monkeypatch.setattr(np, "genfromtxt", lambda fname, delimiter: None)
with pytest.raises(TypeError):
autompg_data()
def test_autompg_data_calls_genfromtxt(monkeypatch):
"""Test that autompg_data calls np.genfromtxt with the correct file path and delimiter."""
calls = []

def dummy_genfromtxt(fname, delimiter):
calls.append((fname, delimiter))
# Return dummy data with two samples:
# 2 features (all columns except target) and 1 target column.
return np.array([[1, 2, 3], [4, 5, 6]])

monkeypatch.setattr(np, "genfromtxt", dummy_genfromtxt)

X, y = autompg_data()

assert calls, "np.genfromtxt was not called"
fname, delim = calls[0]
# Check that the file path ends with the expected subdirectory/filename.
expected_ending = os.path.join("data", "autompg.csv.gz")
assert fname.endswith(expected_ending), "The file path used is incorrect."
assert delim == ",", "The delimiter used is not a comma."

# Check that X and y are correctly parsed:
expected_X = np.array([[1, 2], [4, 5]])
expected_y = np.array([3, 6])
np.testing.assert_array_equal(X, expected_X)
np.testing.assert_array_equal(y, expected_y)
def test_autompg_data_with_nans(monkeypatch):
"""Test that autompg_data returns arrays that correctly preserve np.nan values."""
# Create dummy data with np.nan values.
dummy_data = np.array([
[1.0, np.nan, 3.0, 4.0],
[5.0, 6.0, np.nan, 8.0]
])
# Monkeypatch np.genfromtxt to return dummy_data
monkeypatch.setattr(np, "genfromtxt", lambda fname, delimiter: dummy_data)

X, y = autompg_data()
expected_X = dummy_data[:, :-1]
expected_y = dummy_data[:, -1]
np.testing.assert_array_equal(X, expected_X)
np.testing.assert_array_equal(y, expected_y)

def test_autompg_data_list_input(monkeypatch):
"""Test that autompg_data raises a TypeError when np.genfromtxt returns a list instead of an ndarray."""
# Return a normal Python list rather than a NumPy array.
dummy_data = [[1, 2, 3], [4, 5, 6]]
monkeypatch.setattr(np, "genfromtxt", lambda fname, delimiter: dummy_data)

with pytest.raises(TypeError):
autompg_data()

def test_autompg_data_non_numeric(monkeypatch):
"""Test that autompg_data correctly parses datasets containing non-numeric (string) values."""
dummy_data = np.array([
["a", "b", "c"],
["d", "e", "f"]
])
monkeypatch.setattr(np, "genfromtxt", lambda fname, delimiter: dummy_data)

X, y = autompg_data()
expected_X = dummy_data[:, :-1]
expected_y = dummy_data[:, -1]
np.testing.assert_array_equal(X, expected_X)
np.testing.assert_array_equal(y, expected_y)
Loading