cdisc-org · RamilCDISC · Dec 18, 2025 · Dec 15, 2025 · Dec 16, 2025 · Dec 16, 2025
diff --git a/core.py b/core.py
@@ -84,11 +84,20 @@ def cli():
     pass
 
 
-def _validate_data_directory(data: str, logger) -> tuple[list, set]:
+def _validate_data_directory(
+    data: str, logger, filetype: str = None
+) -> tuple[list, set]:
     """Validate data directory and return dataset paths and found formats."""
-    dataset_paths, found_formats = valid_data_file(
-        [str(p) for p in Path(data).rglob("*") if p.is_file()]
-    )
+    # Added filetype argument to filter files by extension if provided
+    if filetype:
+        pattern = f"*.{filetype}"
+        dataset_paths, found_formats = valid_data_file(
+            [str(p) for p in Path(data).rglob(pattern) if p.is_file()]
+        )
+    else:
+        dataset_paths, found_formats = valid_data_file(
+            [str(p) for p in Path(data).rglob("*") if p.is_file()]
+        )
 
     if DataFormatTypes.XLSX.value in found_formats and len(found_formats) > 1:
         logger.error(
@@ -175,6 +184,13 @@ def _validate_no_arguments(logger) -> None:
     required=False,
     help=f"Path to directory containing data files ({VALIDATION_FORMATS_MESSAGE})",
 )
+@click.option(
+    "-ft",
+    "--filetype",
+    default=None,
+    required=False,
+    help="File extension to use for input files in the data directory (e.g., 'json', 'xpt', 'xlsx', 'ndjson')",
+)
 @click.option(
     "-dp",
     "--dataset-path",
@@ -352,6 +368,7 @@ def validate(
     cache: str,
     pool_size: int,
     data: str,
+    filetype: str,
     dataset_path: tuple[str],
     log_level: str,
     report_template: str,
@@ -435,7 +452,7 @@ def validate(
                 "Argument --dataset-path cannot be used together with argument --data"
             )
             ctx.exit(2)
-        dataset_paths, found_formats = _validate_data_directory(data, logger)
+        dataset_paths, found_formats = _validate_data_directory(data, logger, filetype)
         if dataset_paths is None:
             ctx.exit(2)
     elif dataset_path:

diff --git a/tests/QARegressionTests/test_Issues/test_CoreIssue1442.py b/tests/QARegressionTests/test_Issues/test_CoreIssue1442.py
@@ -0,0 +1,61 @@
+import os
+import subprocess
+import unittest
+import openpyxl
+import pytest
+from conftest import get_python_executable
+
+
+@pytest.mark.regression
+class TestCoreIssue1442(unittest.TestCase):
+    def test_positive_dataset(self):
+        # Run the command in the terminal
+        command = [
+            f"{get_python_executable()}",
+            "-m",
+            "core",
+            "validate",
+            "-s",
+            "usdm",
+            "-v",
+            "4-0",
+            "-d",
+            os.path.join("tests", "resources", "CoreIssue1442"),
+            "-ft",
+            "json",
+            "-lr",
+            os.path.join("tests", "resources", "CoreIssue1442", "rule.yml"),
+        ]
+        subprocess.run(command, check=True)
+
+        # Get the latest created Excel file
+        files = os.listdir()
+        excel_files = [
+            file
+            for file in files
+            if file.startswith("CORE-Report-") and file.endswith(".xlsx")
+        ]
+        excel_file_path = sorted(excel_files)[-1]
+        # # Open the Excel file
+        workbook = openpyxl.load_workbook(excel_file_path)
+        if "Conformance Details" in workbook.sheetnames:
+            conformance_sheet = workbook["Conformance Details"]
+            found = False
+            for row in conformance_sheet.iter_rows(min_row=2, values_only=True):
+                for idx, cell in enumerate(row[:-1]):
+                    if (
+                        cell == "JSON file name"
+                        and row[idx + 1] == "CDISC_Pilot_Study.json"
+                    ):
+                        found = True
+                        break
+                if found:
+                    break
+            assert (
+                found
+            ), "Pair ('JSON file name', 'CDISC_Pilot_Study.json') not found in any row of 'Conformance Details' sheet."
+        else:
+            assert False, "'Conformance Details' sheet not found in report."
+
+        if os.path.exists(excel_file_path):
+            os.remove(excel_file_path)