Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 22 additions & 5 deletions core.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,11 +84,20 @@ def cli():
pass


def _validate_data_directory(data: str, logger) -> tuple[list, set]:
def _validate_data_directory(
data: str, logger, filetype: str = None
) -> tuple[list, set]:
"""Validate data directory and return dataset paths and found formats."""
dataset_paths, found_formats = valid_data_file(
[str(p) for p in Path(data).rglob("*") if p.is_file()]
)
# Added filetype argument to filter files by extension if provided
if filetype:
pattern = f"*.{filetype}"
dataset_paths, found_formats = valid_data_file(
[str(p) for p in Path(data).rglob(pattern) if p.is_file()]
)
else:
dataset_paths, found_formats = valid_data_file(
[str(p) for p in Path(data).rglob("*") if p.is_file()]
)

if DataFormatTypes.XLSX.value in found_formats and len(found_formats) > 1:
logger.error(
Expand Down Expand Up @@ -175,6 +184,13 @@ def _validate_no_arguments(logger) -> None:
required=False,
help=f"Path to directory containing data files ({VALIDATION_FORMATS_MESSAGE})",
)
@click.option(
"-ft",
"--filetype",
default=None,
required=False,
help="File extension to use for input files in the data directory (e.g., 'json', 'xpt', 'xlsx', 'ndjson')",
)
@click.option(
"-dp",
"--dataset-path",
Expand Down Expand Up @@ -352,6 +368,7 @@ def validate(
cache: str,
pool_size: int,
data: str,
filetype: str,
dataset_path: tuple[str],
log_level: str,
report_template: str,
Expand Down Expand Up @@ -435,7 +452,7 @@ def validate(
"Argument --dataset-path cannot be used together with argument --data"
)
ctx.exit(2)
dataset_paths, found_formats = _validate_data_directory(data, logger)
dataset_paths, found_formats = _validate_data_directory(data, logger, filetype)
if dataset_paths is None:
ctx.exit(2)
elif dataset_path:
Expand Down
61 changes: 61 additions & 0 deletions tests/QARegressionTests/test_Issues/test_CoreIssue1442.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
import os
import subprocess
import unittest
import openpyxl
import pytest
from conftest import get_python_executable


@pytest.mark.regression
class TestCoreIssue1442(unittest.TestCase):
def test_positive_dataset(self):
# Run the command in the terminal
command = [
f"{get_python_executable()}",
"-m",
"core",
"validate",
"-s",
"usdm",
"-v",
"4-0",
"-d",
os.path.join("tests", "resources", "CoreIssue1442"),
"-ft",
"json",
"-lr",
os.path.join("tests", "resources", "CoreIssue1442", "rule.yml"),
]
subprocess.run(command, check=True)

# Get the latest created Excel file
files = os.listdir()
excel_files = [
file
for file in files
if file.startswith("CORE-Report-") and file.endswith(".xlsx")
]
excel_file_path = sorted(excel_files)[-1]
# # Open the Excel file
workbook = openpyxl.load_workbook(excel_file_path)
if "Conformance Details" in workbook.sheetnames:
conformance_sheet = workbook["Conformance Details"]
found = False
for row in conformance_sheet.iter_rows(min_row=2, values_only=True):
for idx, cell in enumerate(row[:-1]):
if (
cell == "JSON file name"
and row[idx + 1] == "CDISC_Pilot_Study.json"
):
found = True
break
if found:
break
assert (
found
), "Pair ('JSON file name', 'CDISC_Pilot_Study.json') not found in any row of 'Conformance Details' sheet."
else:
assert False, "'Conformance Details' sheet not found in report."

if os.path.exists(excel_file_path):
os.remove(excel_file_path)
Loading