ModelChecker/Code/run_tests.py at master · benbrastmckie/ModelChecker · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
#!/usr/bin/env python3
"""Unified test runner for ModelChecker theories and package components.

This script provides a comprehensive interface for running tests across:
- Theory examples (integration tests from examples.py files)
- Unit tests (component/implementation tests)
- Package tests (framework infrastructure tests)

The test runner uses smart detection to automatically determine whether targets
are theories or components, with explicit override options when needed.

Usage examples:
    ./run_tests.py                           # Run all tests
    ./run_tests.py --examples                # Run only example tests for all theories
    ./run_tests.py --unit                    # Run only unit tests for all targets
    ./run_tests.py --package                 # Run only package tests for all components

    # Auto-detection (theories)
    ./run_tests.py logos                     # All test types for logos theory
    ./run_tests.py logos modal               # All test types for logos modal subtheory
    ./run_tests.py --examples logos modal    # Only example tests for logos modal

    # Auto-detection (components)
    ./run_tests.py iterate                   # All test types for iterate component
    ./run_tests.py iterate builder           # All test types for multiple components
    ./run_tests.py --unit iterate            # Only unit tests for iterate
    ./run_tests.py --package --unit iterate  # Package and unit tests for iterate

    # Explicit type specification
    ./run_tests.py --theory parser           # Force targets as theories
    ./run_tests.py --component iterate       # Force targets as components
"""

import os
import sys
import argparse
import subprocess
import time
from dataclasses import dataclass
from typing import List, Dict, Optional, Union, Tuple
from pathlib import Path


@dataclass
class TestConfig:
    """Immutable test configuration."""
    theories: List[str]
    subtheories: Dict[str, List[str]]  # theory -> list of subtheories
    components: List[str]
    run_examples: bool
    run_unit: bool
    run_package: bool
    verbose: bool
    failfast: bool
    coverage: bool
    markers: List[str]
    pytest_args: List[str]
    force_theory: bool = False  # Force targets as theories
    force_component: bool = False  # Force targets as components

    @classmethod
    def from_args(cls, args, runner: 'TestRunner') -> 'TestConfig':
        """Create configuration from command line arguments."""
        # Check for conflicting flags
        if args.theory and args.component:
            raise ValueError("Cannot use both --theory and --component flags")

        # Handle deprecated --components flag
        if args.components:
            import warnings
            warnings.warn(
                "The --components flag is deprecated. Use positional arguments instead.",
                DeprecationWarning,
                stacklevel=2
            )
            # Merge components into targets
            args.targets = list(args.targets or []) + args.components
            args.component = True  # Force component mode

        # Detect target types
        theories = []
        components = []
        subtheories = {}

        if args.targets:
            # Use smart detection or explicit type
            target_type = runner._detect_target_types(args.targets, args.theory, args.component)

            if target_type == 'theories':
                theories, subtheories = runner._parse_theory_targets(args.targets)
            elif target_type == 'components':
                components = args.targets
            elif target_type == 'mixed':
                # Mixed mode when explicitly specified
                for target in args.targets:
                    if target in runner.theories:
                        theories.append(target)
                    elif target in runner.components:
                        components.append(target)
                    else:
                        raise ValueError(f"Unknown target: {target}")
        else:
            # No targets specified - determine defaults based on test types
            if not any([args.examples, args.unit, args.package]):
                # No flags - run all tests for all targets
                theories = runner.theories
                components = runner.components
            else:
                # Specific test types requested
                if args.examples:
                    theories = runner.theories  # Examples only apply to theories
                if args.unit:
                    theories = runner.theories if not components else theories
                    components = runner.components if not theories else components
                if args.package:
                    components = runner.components  # Package tests only apply to components

        # Determine what test types to run
        run_examples = args.examples or (not args.unit and not args.package)
        run_unit = args.unit or (not args.examples and not args.package)
        run_package = args.package or (not args.examples and not args.unit)

        config = cls(
            theories=theories,
            subtheories=subtheories,
            components=components,
            run_examples=run_examples,
            run_unit=run_unit,
            run_package=run_package,
            verbose=args.verbose,
            failfast=args.failfast,
            coverage=getattr(args, 'coverage', False),
            markers=getattr(args, 'markers', []),
            pytest_args=[],
            force_theory=args.theory,
            force_component=args.component
        )

        config.validate(runner)
        return config

    def validate(self, runner: 'TestRunner') -> None:
        """Validate configuration against available theories/components."""
        validator = TestConfigValidator()
        validator.validate_theories(self.theories, runner.theories)
        validator.validate_components(self.components, runner.components)

        for theory, subs in self.subtheories.items():
            validator.validate_subtheories(theory, subs, runner.subtheories)


class TestResults:
    """Tracks test execution results."""

    def __init__(self):
        self.theory_results: Dict[str, Dict[str, int]] = {}  # theory -> test_type -> exit_code
        self.component_results: Dict[str, int] = {}  # component -> exit_code
        self.overall_success = True
        self.subtheory_counts: Dict[str, Dict[str, int]] = {}  # theory -> subtheory -> count
        self.theory_timings: Dict[str, Dict[str, float]] = {}  # theory -> test_type -> time_taken
        self.subtheory_timings: Dict[str, Dict[str, float]] = {}  # theory -> subtheory -> time_taken

    def add_theory_result(self, theory: str, test_type: str, exit_code: int) -> None:
        """Add result for theory test execution."""
        if theory not in self.theory_results:
            self.theory_results[theory] = {}
        self.theory_results[theory][test_type] = exit_code
        if exit_code != 0:
            self.overall_success = False

    def add_component_result(self, component: str, exit_code: int) -> None:
        """Add result for component test execution."""
        self.component_results[component] = exit_code
        if exit_code != 0:
            self.overall_success = False

    def add_subtheory_count(self, theory: str, subtheory: str, count: int) -> None:
        """Add example count for a subtheory."""
        if theory not in self.subtheory_counts:
            self.subtheory_counts[theory] = {}
        self.subtheory_counts[theory][subtheory] = count

    def add_theory_timing(self, theory: str, test_type: str, time_taken: float) -> None:
        """Add timing for theory test execution."""
        if theory not in self.theory_timings:
            self.theory_timings[theory] = {}
        self.theory_timings[theory][test_type] = time_taken

    def add_subtheory_timing(self, theory: str, subtheory: str, time_taken: float) -> None:
        """Add timing for subtheory test execution."""
        if theory not in self.subtheory_timings:
            self.subtheory_timings[theory] = {}
        self.subtheory_timings[theory][subtheory] = time_taken

    def merge(self, other: 'TestResults') -> None:
        """Merge another TestResults into this one."""
        for theory, results in other.theory_results.items():
            if theory not in self.theory_results:
                self.theory_results[theory] = {}
            self.theory_results[theory].update(results)

        self.component_results.update(other.component_results)

        # Merge subtheory counts
        for theory, counts in other.subtheory_counts.items():
            if theory not in self.subtheory_counts:
                self.subtheory_counts[theory] = {}
            self.subtheory_counts[theory].update(counts)

        # Merge theory timings
        for theory, timings in other.theory_timings.items():
            if theory not in self.theory_timings:
                self.theory_timings[theory] = {}
            self.theory_timings[theory].update(timings)

        # Merge subtheory timings
        for theory, timings in other.subtheory_timings.items():
            if theory not in self.subtheory_timings:
                self.subtheory_timings[theory] = {}
            self.subtheory_timings[theory].update(timings)

        self.overall_success = self.overall_success and other.overall_success

    def get_exit_code(self) -> int:
        """Get overall exit code (0 for success, 1 for failures)."""
        return 0 if self.overall_success else 1

    def print_summary(self) -> None:
        """Print test execution summary."""
        print("\n" + "=" * 80)
        print("Test Summary:")

        # Theory results
        if self.theory_results:
            print("\nTheory Tests:")
            for theory, results in self.theory_results.items():
                for test_type, exit_code in results.items():
                    status = "PASSED" if exit_code == 0 else "FAILED"
                    print(f"  {theory} ({test_type}): {status}")

                    # Show timing info
                    if theory in self.theory_timings and test_type in self.theory_timings[theory]:
                        time_taken = self.theory_timings[theory][test_type]
                        print(f"    Time: {time_taken:.2f}s")

                    # Show example counts for all theories
                    if test_type == "examples" and theory in self.subtheory_counts:
                        if theory == "logos":
                            print(f"    Subtheory example counts:")
                            for subtheory, count in sorted(self.subtheory_counts[theory].items()):
                                timing_str = ""
                                if theory in self.subtheory_timings and subtheory in self.subtheory_timings[theory]:
                                    timing_str = f" ({self.subtheory_timings[theory][subtheory]:.2f}s)"
                                print(f"      {subtheory}: {count} examples{timing_str}")
                            total = sum(self.subtheory_counts[theory].values())
                            total_time = sum(self.subtheory_timings.get(theory, {}).values())
                            print(f"      Total: {total} examples ({total_time:.2f}s)")
                        else:
                            # For non-logos theories, just show the total
                            if "total" in self.subtheory_counts[theory]:
                                print(f"    Example count: {self.subtheory_counts[theory]['total']} examples")

        # Component results
        if self.component_results:
            print("\nPackage Tests:")
            for component, exit_code in self.component_results.items():
                status = "PASSED" if exit_code == 0 else "FAILED"
                print(f"  {component}: {status}")

        # Overall status
        overall_status = "SUCCESS: All tests passed!" if self.overall_success else "FAILED: Some tests failed"
        print(f"\n{overall_status}")


class TestConfigValidator:
    """Validates test configuration before execution."""

    def validate_theories(self, theories: List[str], available: List[str]) -> None:
        """Validate requested theories exist."""
        invalid = [t for t in theories if t not in available]
        if invalid:
            available_str = ', '.join(sorted(available))
            raise ValueError(f"Unknown theories: {invalid}. Available: {available_str}")

    def validate_subtheories(self, theory: str, subtheories: List[str],
                           available_subtheories: Dict[str, List[str]]) -> None:
        """Validate subtheories belong to theory."""
        if subtheories and theory not in available_subtheories:
            raise ValueError(f"Theory '{theory}' does not support subtheories")

        if subtheories:
            valid = available_subtheories[theory]
            invalid = [s for s in subtheories if s not in valid]
            if invalid:
                valid_str = ', '.join(sorted(valid))
                raise ValueError(f"Unknown subtheories for {theory}: {invalid}. Available: {valid_str}")

    def validate_components(self, components: List[str], available: List[str]) -> None:
        """Validate requested components exist."""
        invalid = [c for c in components if c not in available]
        if invalid:
            available_str = ', '.join(sorted(available))
            raise ValueError(f"Unknown components: {invalid}. Available: {available_str}")


class ExampleTestRunner:
    """Runs integration tests from examples.py files."""

    def __init__(self, code_dir: Path):
        self.code_dir = code_dir
        self.src_dir = code_dir / "src"

    def run_theory_examples(self, theory: str, subtheories: List[str], config: TestConfig,
                           results: Optional[TestResults] = None) -> int:
        """Run example tests for a specific theory with optional subtheory filtering."""
        if theory == 'logos':
            return self._run_logos_example_tests(subtheories, config, results)
        else:
            return self._run_standard_example_tests(theory, config, results)

    def _run_logos_example_tests(self, subtheories: List[str], config: TestConfig,
                                results: Optional[TestResults] = None) -> int:
        """Run logos example tests from subtheory directories."""
        overall_exit_code = 0

        # Determine which subtheories to test
        target_subtheories = subtheories if subtheories else ['modal', 'counterfactual', 'extensional', 'constitutive', 'relevance']

        for subtheory in target_subtheories:
            subtheory_test_dir = self.src_dir / "model_checker" / "theory_lib" / "logos" / "subtheories" / subtheory / "tests"

            if not subtheory_test_dir.exists():
                print(f"      Warning: No tests found for logos {subtheory} subtheory")
                continue

            print(f"      Testing {subtheory} subtheory examples")

            # Count examples first if results object provided
            if results:
                example_count = self._count_subtheory_examples(subtheory)
                if example_count > 0:
                    results.add_subtheory_count("logos", subtheory, example_count)

            # Build command for subtheory examples
            command = ["pytest", str(subtheory_test_dir)]
            command.extend(["-k", "example"])  # Only example tests (matches both "examples" and "example_cases")

            if config.verbose:
                command.append("-v")
            if config.failfast:
                command.append("-x")

            # Execute tests and measure time
            env = self._setup_environment()
            start_time = time.time()
            try:
                result = subprocess.run(command, cwd=self.code_dir, env=env)
                elapsed_time = time.time() - start_time

                # Record timing if results object provided
                if results:
                    results.add_subtheory_timing("logos", subtheory, elapsed_time)

                if result.returncode != 0:
                    overall_exit_code = result.returncode
                    if config.failfast:
                        break
            except Exception as e:
                elapsed_time = time.time() - start_time
                if results:
                    results.add_subtheory_timing("logos", subtheory, elapsed_time)
                print(f"      Error running {subtheory} examples: {e}")
                overall_exit_code = 1
                if config.failfast:
                    break

        return overall_exit_code

    def _run_standard_example_tests(self, theory: str, config: TestConfig,
                                   results: Optional[TestResults] = None) -> int:
        """Run example tests for standard theories (non-logos)."""
        test_dir = self.src_dir / "model_checker" / "theory_lib" / theory / "tests"

        if not test_dir.exists():
            print(f"    Warning: No test directory found for {theory}")
            return 0

        # Count examples first if results object provided
        if results:
            example_count = self._count_theory_examples(theory)
            if example_count > 0:
                results.add_subtheory_count(theory, "total", example_count)

        # Build pytest command
        command = ["pytest", str(test_dir)]
        command.extend(["-k", "example"])  # Only example tests (matches both "examples" and "example_cases")

        if config.verbose:
            command.append("-v")
        if config.failfast:
            command.append("-x")

        # Set up environment and execute
        env = self._setup_environment()
        try:
            result = subprocess.run(command, cwd=self.code_dir, env=env)
            return result.returncode
        except Exception as e:
            print(f"    Error running example tests for {theory}: {e}")
            return 1

    def _count_subtheory_examples(self, subtheory: str) -> int:
        """Count the number of examples for a logos subtheory."""
        examples_file = self.src_dir / "model_checker" / "theory_lib" / "logos" / "subtheories" / subtheory / "examples.py"

        if not examples_file.exists():
            return 0

        # Count examples by looking for patterns like "XXX_TH_N_example" or "XXX_CM_N_example"
        count = 0
        try:
            with open(examples_file, 'r') as f:
                content = f.read()
                # Match patterns like MOD_TH_1_example, CF_CM_2_example, etc.
                import re
                pattern = r'^[A-Z]+_(?:TH|CM)_\d+_example\s*='
                matches = re.findall(pattern, content, re.MULTILINE)
                count = len(matches)
        except Exception:
            pass

        return count

    def _count_theory_examples(self, theory: str) -> int:
        """Count the number of examples for a standard theory."""
        examples_file = self.src_dir / "model_checker" / "theory_lib" / theory / "examples.py"

        if not examples_file.exists():
            return 0

        # Count examples by looking for patterns
        count = 0
        try:
            with open(examples_file, 'r') as f:
                content = f.read()
                # Match patterns - theories use different prefixes
                import re
                # Generic pattern to match various naming conventions
                patterns = [
                    r'^[A-Z]+_(?:TH|CM)_\d+_example\s*=',  # Standard pattern like MOD_TH_1_example
                    r'^[A-Za-z_]+_example_\d+\s*=',        # Pattern like exclusion_example_1
                    r'^example_[A-Za-z_]+_\d+\s*=',        # Pattern like example_counterfactual_1
                    r'^[A-Za-z]+Example\d+\s*=',           # Pattern like ImpositionExample1
                ]

                for pattern in patterns:
                    matches = re.findall(pattern, content, re.MULTILINE)
                    count += len(matches)
        except Exception:
            pass

        return count


    def _setup_environment(self) -> Dict[str, str]:
        """Set up environment for test execution."""
        env = os.environ.copy()
        env['PYTHONPATH'] = str(self.src_dir)
        return env


class UnitTestRunner:
    """Runs unit tests for theory implementations."""

    def __init__(self, code_dir: Path):
        self.code_dir = code_dir
        self.src_dir = code_dir / "src"

    def run_theory_units(self, theory: str, subtheories: List[str], config: TestConfig) -> int:
        """Run unit tests for a specific theory with optional subtheory filtering."""
        if theory == 'logos':
            return self._run_logos_unit_tests(subtheories, config)
        else:
            return self._run_standard_unit_tests(theory, config)

    def _run_logos_unit_tests(self, subtheories: List[str], config: TestConfig) -> int:
        """Run logos unit tests from main tests directory with subtheory filtering."""
        test_dir = self.src_dir / "model_checker" / "theory_lib" / "logos" / "tests"

        if not test_dir.exists():
            print(f"    Warning: No test directory found for logos")
            return 0

        # Build command for logos unit tests
        command = ["pytest", str(test_dir)]

        # Add subtheory filtering if specified
        if subtheories:
            # Build filter for specific subtheories
            subtheory_patterns = {
                'modal': '(modal or MOD_)',
                'counterfactual': '(counterfactual or CF_)',
                'extensional': '(extensional or EXT_)',
                'constitutive': '(constitutive or CON_ or CL_)',
                'relevance': '(relevance or REL_)'
            }

            patterns = [subtheory_patterns[sub] for sub in subtheories if sub in subtheory_patterns]
            if patterns:
                filter_expr = f"({' or '.join(patterns)}) and not example"
                command.extend(["-k", filter_expr])
            else:
                command.extend(["-k", "not example"])  # Just exclude examples
        else:
            command.extend(["-k", "not example"])  # Exclude example tests

        if config.verbose:
            command.append("-v")
        if config.failfast:
            command.append("-x")

        # Execute tests
        env = self._setup_environment()
        try:
            result = subprocess.run(command, cwd=self.code_dir, env=env)
            return result.returncode
        except Exception as e:
            print(f"    Error running unit tests for logos: {e}")
            return 1

    def _run_standard_unit_tests(self, theory: str, config: TestConfig) -> int:
        """Run unit tests for standard theories (non-logos)."""
        test_dir = self.src_dir / "model_checker" / "theory_lib" / theory / "tests"

        if not test_dir.exists():
            print(f"    Warning: No test directory found for {theory}")
            return 0

        # Build pytest command
        command = ["pytest", str(test_dir)]
        command.extend(["-k", "not example"])  # Exclude example tests

        if config.verbose:
            command.append("-v")
        if config.failfast:
            command.append("-x")

        # Set up environment and execute
        env = self._setup_environment()
        try:
            result = subprocess.run(command, cwd=self.code_dir, env=env)
            return result.returncode
        except Exception as e:
            print(f"    Error running unit tests for {theory}: {e}")
            return 1


    def _setup_environment(self) -> Dict[str, str]:
        """Set up environment for test execution."""
        env = os.environ.copy()
        env['PYTHONPATH'] = str(self.src_dir)
        return env


class PackageTestRunner:
    """Runs package/infrastructure component tests."""

    def __init__(self, code_dir: Path):
        self.code_dir = code_dir
        self.src_dir = code_dir / "src"

    def run_component_tests(self, component: str, config: TestConfig) -> int:
        """Run tests for a specific package component."""
        if component == "theory_lib":
            test_dir = self.src_dir / "model_checker" / "theory_lib" / "tests"
        else:
            test_dir = self.src_dir / "model_checker" / component / "tests"

        if not test_dir.exists():
            print(f"    Warning: No test directory found for {component}")
            return 0

        # Build pytest command
        command = self._build_pytest_command(test_dir, config)

        # Set up environment
        env = self._setup_environment()

        # Execute tests
        try:
            result = subprocess.run(command, cwd=self.code_dir, env=env)
            return result.returncode
        except Exception as e:
            print(f"    Error running package tests for {component}: {e}")
            return 1

    def _build_pytest_command(self, test_dir: Path, config: TestConfig) -> List[str]:
        """Build pytest command for package tests."""
        command = ["pytest", str(test_dir)]

        # Add standard pytest options
        if config.verbose:
            command.append("-v")
        if config.failfast:
            command.append("-x")

        return command

    def _setup_environment(self) -> Dict[str, str]:
        """Set up environment for test execution."""
        env = os.environ.copy()
        env['PYTHONPATH'] = str(self.src_dir)
        return env


class TestRunner:
    """Main test runner coordinating all test execution."""

    def __init__(self):
        self.code_dir = Path(__file__).parent
        self.theories = self._discover_theories()
        self.components = self._discover_components()
        self.subtheories = self._discover_subtheories()
        self.test_categories = ['examples', 'unit', 'package']

    def run(self, config: TestConfig) -> TestResults:
        """Execute tests based on configuration."""
        results = TestResults()

        # Print startup information
        self._print_startup_info(config)

        # Run requested test types
        if config.run_examples and config.theories:
            print(f"\nRunning example tests for theories: {', '.join(config.theories)}")
            example_results = self._run_example_tests(config)
            results.merge(example_results)

        if config.run_unit and config.theories:
            print(f"\nRunning unit tests for theories: {', '.join(config.theories)}")
            unit_results = self._run_unit_tests(config)
            results.merge(unit_results)

        if config.run_package and config.components:
            print(f"\nRunning package tests for components: {', '.join(config.components)}")
            package_results = self._run_package_tests(config)
            results.merge(package_results)

        return results

    def _discover_theories(self) -> List[str]:
        """Discover available theories."""
        theories = []
        theory_lib_dir = self.code_dir / "src" / "model_checker" / "theory_lib"

        if not theory_lib_dir.exists():
            return theories

        for item in theory_lib_dir.iterdir():
            if (item.is_dir() and
                not item.name.startswith('__') and
                item.name != 'bimodal' and  # Exclude bimodal theory (not finished)
                (item / "tests").exists() and
                (item / "examples.py").exists()):
                theories.append(item.name)

        return sorted(theories)

    def _discover_components(self) -> List[str]:
        """Discover available package components with test directories."""
        components = []
        src_dir = self.code_dir / "src" / "model_checker"

        if not src_dir.exists():
            return components

        for item in src_dir.iterdir():
            if (item.is_dir() and
                not item.name.startswith('__') and
                item.name != 'theory_lib' and  # Skip theory_lib (handled separately)
                (item / "tests").exists()):
                components.append(item.name)

        # Add theory_lib itself (for infrastructure tests)
        theory_lib_tests = src_dir / "theory_lib" / "tests"
        if theory_lib_tests.exists():
            components.append("theory_lib")

        return sorted(components)

    def _discover_subtheories(self) -> Dict[str, List[str]]:
        """Discover subtheories for theories that support them."""
        # Currently only logos supports subtheories
        return {
            'logos': ['modal', 'counterfactual', 'extensional', 'constitutive', 'relevance']
        }

    def _detect_target_types(self, targets: List[str], force_theory: bool, force_component: bool) -> str:
        """Detect whether targets are theories, components, or mixed.

        Args:
            targets: List of target names
            force_theory: Force all targets as theories
            force_component: Force all targets as components

        Returns:
            'theories', 'components', or 'mixed'
        """
        if force_theory:
            return 'theories'
        if force_component:
            return 'components'

        # Auto-detect based on first target and check consistency
        first_target = targets[0]

        # Check if it's a subtheory
        is_subtheory = any(first_target in subs for subs in self.subtheories.values())

        if first_target in self.theories or is_subtheory:
            target_type = 'theories'
            # Verify all targets are theories or subtheories
            for target in targets[1:]:
                if target not in self.theories and not any(target in subs for subs in self.subtheories.values()):
                    if target in self.components:
                        raise ValueError(
                            f"Mixed target types detected: '{first_target}' is a theory but '{target}' is a component.\n"
                            f"Use --theory to force all as theories or --component to force all as components."
                        )
                    else:
                        # Will be caught by theory parser
                        pass
        elif first_target in self.components:
            target_type = 'components'
            # Verify all targets are components
            for target in targets[1:]:
                if target in self.theories:
                    raise ValueError(
                        f"Mixed target types detected: '{first_target}' is a component but '{target}' is a theory.\n"
                        f"Use --component to force all as components or --theory to force all as theories."
                    )
                elif target not in self.components:
                    # Unknown target - will be caught by validation
                    pass
        else:
            # Unknown first target
            if first_target in self.theories and first_target in self.components:
                raise ValueError(
                    f"'{first_target}' exists as both theory and component.\n"
                    f"Use --theory or --component to specify which type."
                )
            else:
                # Try to give a helpful error
                all_targets = sorted(set(self.theories + self.components +
                                       [sub for subs in self.subtheories.values() for sub in subs]))
                raise ValueError(f"Unknown target: {first_target}\nAvailable targets: {', '.join(all_targets)}")

        return target_type

    def _parse_theory_targets(self, targets: List[str]) -> tuple[List[str], Dict[str, List[str]]]:
        """Parse targets as theories and subtheories.

        Examples:
            ['logos'] -> (['logos'], {})
            ['logos', 'modal'] -> (['logos'], {'logos': ['modal']})
            ['logos', 'modal', 'counterfactual'] -> (['logos'], {'logos': ['modal', 'counterfactual']})
            ['exclusion', 'bimodal'] -> (['exclusion', 'bimodal'], {})
        """
        if not targets:
            return [], {}

        theories = []
        subtheories = {}

        i = 0
        while i < len(targets):
            target = targets[i]

            # Check if this is a theory
            if target in self.theories:
                theories.append(target)

                # Check if this theory supports subtheories and has them specified
                if target in self.subtheories:
                    theory_subtheories = []
                    # Look ahead for subtheories
                    j = i + 1
                    while j < len(targets) and targets[j] in self.subtheories[target]:
                        theory_subtheories.append(targets[j])
                        j += 1

                    if theory_subtheories:
                        subtheories[target] = theory_subtheories
                        i = j  # Skip the subtheories we just processed
                    else:
                        i += 1
                else:
                    i += 1
            else:
                # Unknown target
                all_theories = self.theories
                available_subtheories = [sub for subs in self.subtheories.values() for sub in subs]
                raise ValueError(
                    f"Unknown theory target: {target}\n"
                    f"Available theories: {', '.join(sorted(all_theories))}\n"
                    f"Available subtheories: {', '.join(sorted(available_subtheories))}"
                )

        return theories, subtheories

    def _print_startup_info(self, config: TestConfig) -> None:
        """Print information about what will be tested."""
        print("=" * 80)
        print("ModelChecker Unified Test Runner")
        print("=" * 80)

        test_types = []
        if config.run_examples:
            test_types.append("examples")
        if config.run_unit:
            test_types.append("unit")
        if config.run_package:
            test_types.append("package")

        print(f"Test types: {', '.join(test_types)}")

        if config.theories:
            print(f"Theories: {', '.join(config.theories)}")
            for theory, subs in config.subtheories.items():
                if subs:
                    print(f"  {theory} subtheories: {', '.join(subs)}")

        if config.components:
            print(f"Components: {', '.join(config.components)}")

    def _run_example_tests(self, config: TestConfig) -> TestResults:
        """Run example tests for specified theories/subtheories."""
        results = TestResults()
        example_runner = ExampleTestRunner(self.code_dir)

        for theory in config.theories:
            subtheories = config.subtheories.get(theory, [])
            print(f"  Running example tests for {theory}")
            if subtheories:
                print(f"    Subtheories: {', '.join(subtheories)}")

            start_time = time.time()
            exit_code = example_runner.run_theory_examples(theory, subtheories, config, results)
            elapsed_time = time.time() - start_time

            results.add_theory_result(theory, 'examples', exit_code)
            results.add_theory_timing(theory, 'examples', elapsed_time)

            if exit_code != 0 and config.failfast:
                break

        return results

    def _run_unit_tests(self, config: TestConfig) -> TestResults:
        """Run unit tests for specified theories/subtheories."""
        results = TestResults()
        unit_runner = UnitTestRunner(self.code_dir)

        for theory in config.theories:
            subtheories = config.subtheories.get(theory, [])
            print(f"  Running unit tests for {theory}")
            if subtheories:
                print(f"    Subtheories: {', '.join(subtheories)}")

            start_time = time.time()
            exit_code = unit_runner.run_theory_units(theory, subtheories, config)
            elapsed_time = time.time() - start_time

            results.add_theory_result(theory, 'unit', exit_code)
            results.add_theory_timing(theory, 'unit', elapsed_time)

            if exit_code != 0 and config.failfast:
                break

        return results

    def _run_package_tests(self, config: TestConfig) -> TestResults:
        """Run package tests for specified components."""
        results = TestResults()
        package_runner = PackageTestRunner(self.code_dir)

        for component in config.components:
            print(f"  Running package tests for {component}")

            exit_code = package_runner.run_component_tests(component, config)
            results.add_component_result(component, exit_code)

            if exit_code != 0 and config.failfast:
                break

        return results


def create_argument_parser() -> argparse.ArgumentParser:
    """Create command line argument parser."""
    parser = argparse.ArgumentParser(
        description="Unified test runner for ModelChecker theories and components",
        formatter_class=argparse.RawDescriptionHelpFormatter,
        epilog="""
Examples:
  # Run all tests
  %(prog)s                          Run all tests (examples + unit + package)
  %(prog)s --examples               Run only example tests for all theories
  %(prog)s --unit                   Run only unit tests for all targets
  %(prog)s --package                Run only package tests for all components

  # Auto-detection (theories)
  %(prog)s logos                    All test types for logos theory
  %(prog)s logos modal              All test types for logos modal subtheory
  %(prog)s --examples logos modal   Only example tests for logos modal
  %(prog)s exclusion bimodal        All test types for multiple theories

  # Auto-detection (components)
  %(prog)s iterate                  All test types for iterate component
  %(prog)s iterate builder          All test types for multiple components
  %(prog)s --unit iterate            Only unit tests for iterate
  %(prog)s --package --unit iterate Package and unit tests for iterate

  # Explicit type specification
  %(prog)s --theory parser          Force targets as theories
  %(prog)s --component iterate      Force targets as components
        """
    )

    # Test type selection
    test_group = parser.add_argument_group("Test Type Selection")
    test_group.add_argument(
        "--examples",
        action="store_true",
        help="Run only example tests (integration tests from examples.py)"
    )
    test_group.add_argument(
        "--unit",
        action="store_true",
        help="Run only unit tests (component/implementation tests)"
    )
    test_group.add_argument(
        "--package",
        action="store_true",
        help="Run only package tests (framework infrastructure)"
    )

    # Target selection
    parser.add_argument(
        "targets",
        nargs="*",
        help="Targets to test (theories, subtheories, or components)"
    )

    # Target type specification
    target_group = parser.add_argument_group("Target Type Specification")
    target_group.add_argument(
        "--theory",
        action="store_true",
        help="Force targets to be interpreted as theories"
    )
    target_group.add_argument(
        "--component",
        action="store_true",
        help="Force targets to be interpreted as components"
    )

    # Deprecated
    parser.add_argument(
        "--components",
        nargs="+",
        help=argparse.SUPPRESS  # Hide deprecated option
    )

    # Standard options
    parser.add_argument(
        "--verbose", "-v",
        action="store_true",
        help="Enable verbose output"
    )
    parser.add_argument(
        "--failfast", "-x",
        action="store_true",
        help="Stop after first failure"
    )

    return parser


def main():
    """Main entry point."""
    parser = create_argument_parser()
    args = parser.parse_args()

    try:
        # Create test runner and configuration
        runner = TestRunner()

        # Show available targets if none specified and no test type flags
        if not args.targets and not any([args.examples, args.unit, args.package]):