diff --git a/README.md b/README.md index 5f412d1..42a0107 100644 --- a/README.md +++ b/README.md @@ -13,6 +13,11 @@ basic usage: ```bash review-tally -o expressjs -l javascript ``` + +To load settings from a configuration file instead of the CLI, pass the +`--config` option with a path to a TOML file. The file can contain any of the +available CLI options and replaces the `-o/--org` flag with an explicit list of +repositories to analyze. which would produce the following output @@ -162,6 +167,7 @@ review-tally -o expressjs -l javascript --plot-individual --save-plot reviewer_d ## Options * -o, --organization The Github organization that you want to query +* -c, --config Path to a TOML configuration file (overrides matching CLI options) * -l, --languages A comma separated list of languages that you want to include * -s, --start-date The start date for the time frame that you want to query (optional) * -e, --end-date The end date for the time frame that you want to query (optional) @@ -178,6 +184,26 @@ review-tally -o expressjs -l javascript --plot-individual --save-plot reviewer_d * --individual-chart-metric Metric to visualize in individual pie chart. Default: reviews. Available: reviews,comments,engagement_level,thoroughness_score,avg_response_time_hours,avg_completion_time_hours,active_review_days * --no-cache Disable PR review caching (always fetch fresh data from API). By default, caching is enabled for better performance. +## Using a configuration file + +You can capture preferred settings in a TOML configuration file and reuse them +with the `--config` option. + +```toml +repositories = ["expressjs/express", "expressjs/body-parser"] +start_date = "2024-01-01" +end_date = "2024-01-31" +languages = ["javascript", "typescript"] +metrics = ["reviews", "comments", "avg-comments"] +plot_sprint = true +save_plot = "reports/sprint.html" +``` + +Each entry in `repositories` must include both the owner and repository name in +the form `owner/repository`. Any value provided in the configuration file takes +precedence over the equivalent CLI flag. CLI-only runs remain supported by +supplying `-o/--org` as before. + ## GitHub API Rate Limiting This tool uses GitHub's REST and GraphQL APIs extensively to gather pull request and review data. GitHub enforces rate limits to ensure fair usage of their API resources. diff --git a/reviewtally/cli/parse_cmd_line.py b/reviewtally/cli/parse_cmd_line.py index 1f98a2f..cc0cbbf 100644 --- a/reviewtally/cli/parse_cmd_line.py +++ b/reviewtally/cli/parse_cmd_line.py @@ -8,7 +8,13 @@ import importlib.metadata import sys from datetime import datetime, timedelta, timezone -from typing import TypedDict +from pathlib import Path +from typing import Any, TypedDict + +if sys.version_info >= (3, 11): + import tomllib +else: # pragma: no cover - Python 3.10 fallback + import tomli as tomllib # type: ignore[import-not-found] from reviewtally.exceptions.local_exceptions import MalformedDateError @@ -16,7 +22,8 @@ class CommandLineArgs(TypedDict): """Type definition for cli arguments returned by parse_cmd_line.""" - org_name: str + org_name: str | None + repositories: list[str] | None start_date: datetime end_date: datetime languages: list[str] @@ -35,11 +42,64 @@ class CommandLineArgs(TypedDict): show_cache_stats: bool +class ConfigError(Exception): + """Raised when configuration file cannot be processed.""" + + def print_toml_version() -> None: version = importlib.metadata.version("review-tally") print(f"Current version is {version}") # noqa: T201 +def _load_config(path: str) -> dict[str, Any]: + config_path = Path(path) + if not config_path.is_file(): + msg = f"Error: Configuration file '{path}' not found" + raise ConfigError(msg) + try: + with config_path.open("rb") as file: + data = tomllib.load(file) + except tomllib.TOMLDecodeError as exc: # pragma: no cover + msg = f"Error parsing configuration file '{path}': {exc}" + raise ConfigError(msg) from exc + if not isinstance(data, dict): + msg = "Configuration file must contain a TOML table" + raise ConfigError(msg) + return data + + +def _split_list(value: str | list[str] | None) -> list[str]: + if value is None: + return [] + if isinstance(value, str): + return [item.strip() for item in value.split(",") if item.strip()] + if isinstance(value, list): + result: list[str] = [] + for item in value: + if not isinstance(item, str): + msg = "Configuration list values must be strings" + raise ConfigError(msg) + if item.strip(): + result.append(item.strip()) + return result + msg = "Configuration values must be strings or lists of strings" + raise ConfigError(msg) + + +def _parse_repositories(value: str | list[str] | None) -> list[str]: + repositories = _split_list(value) + if not repositories: + msg = "Configuration must define at least one repository" + raise ConfigError(msg) + for repository in repositories: + if "/" not in repository: + msg = ( + "Repository entries must be in the format 'owner/repository'" + ) + raise ConfigError(msg) + return repositories + + def parse_cmd_line() -> CommandLineArgs: # noqa: C901, PLR0912, PLR0915 description = """Get pull requests for the organization between dates and the reviewers for each pull request. The environment must declare @@ -53,6 +113,12 @@ def parse_cmd_line() -> CommandLineArgs: # noqa: C901, PLR0912, PLR0915 mut_exc_plot_group = parser.add_mutually_exclusive_group() # these arguments are required parser.add_argument("-o", "--org", required=False, help=org_help) + parser.add_argument( + "-c", + "--config", + required=False, + help="Path to TOML configuration file", + ) date_format = "%Y-%m-%d" two_weeks_ago = datetime.now(tz=timezone.utc) - timedelta(days=14) today = datetime.now(tz=timezone.utc) @@ -184,22 +250,44 @@ def parse_cmd_line() -> CommandLineArgs: # noqa: C901, PLR0912, PLR0915 ) args = parser.parse_args() + + config_data: dict[str, Any] = {} + if args.config: + try: + config_data = _load_config(args.config) + except ConfigError as exc: + print(exc) # noqa: T201 + sys.exit(1) + parser_defaults = { + "metrics": parser.get_default("metrics"), + "chart_metrics": parser.get_default("chart_metrics"), + "chart_type": parser.get_default("chart_type"), + "language": parser.get_default("language"), + } # catch ValueError if the date format is not correct + start_date_source = ( + args.start_date + if args.start_date != parser.get_default("start_date") + else config_data.get("start_date", args.start_date) + ) try: - if args.start_date: - start_date = ( - datetime.strptime(args.start_date, "%Y-%m-%d") - ).replace(tzinfo=timezone.utc) + start_date = ( + datetime.strptime(start_date_source, "%Y-%m-%d") + ).replace(tzinfo=timezone.utc) except ValueError: - print(MalformedDateError(args.start_date)) # noqa: T201 + print(MalformedDateError(start_date_source)) # noqa: T201 sys.exit(1) + end_date_source = ( + args.end_date + if args.end_date != parser.get_default("end_date") + else config_data.get("end_date", args.end_date) + ) try: - if args.end_date: - end_date = (datetime.strptime(args.end_date, "%Y-%m-%d")).replace( - tzinfo=timezone.utc, - ) + end_date = (datetime.strptime(end_date_source, "%Y-%m-%d")).replace( + tzinfo=timezone.utc, + ) except ValueError: - print(MalformedDateError(args.end_date)) # noqa: T201 + print(MalformedDateError(end_date_source)) # noqa: T201 sys.exit(1) if args.version: print_toml_version() @@ -208,53 +296,135 @@ def parse_cmd_line() -> CommandLineArgs: # noqa: C901, PLR0912, PLR0915 print("Error: Start date must be before end date") # noqa: T201 sys.exit(1) # if the language arg has comma separated values, split them - if args.language is None: - languages = [] - elif args.language and "," in args.language: - languages = args.language.split(",") + language_source: str | list[str] | None + if args.language is not None: + language_source = args.language else: - languages = [args.language] + language_source = config_data.get( + "languages", + parser_defaults["language"], + ) + try: + languages = _split_list(language_source) + except ConfigError as exc: + print(exc) # noqa: T201 + sys.exit(1) # parse metrics argument - if args.metrics and "," in args.metrics: - metrics = args.metrics.split(",") + metrics_source: str | list[str] | None + if args.metrics != parser_defaults["metrics"]: + metrics_source = args.metrics else: - metrics = [args.metrics] + metrics_source = config_data.get("metrics", parser_defaults["metrics"]) + try: + metrics = _split_list(metrics_source) + except ConfigError as exc: + print(exc) # noqa: T201 + sys.exit(1) # parse chart metrics argument - if args.chart_metrics and "," in args.chart_metrics: - chart_metrics = args.chart_metrics.split(",") + chart_metrics_source: str | list[str] | None + if args.chart_metrics != parser_defaults["chart_metrics"]: + chart_metrics_source = args.chart_metrics else: - chart_metrics = [args.chart_metrics] - - if args.plot_sprint and args.individual_chart_metric: - print( # noqa: T201 - "Error: chart metrics and individual chart metric " - "are mutually exclusive.", + chart_metrics_source = config_data.get( + "chart_metrics", + parser_defaults["chart_metrics"], ) + try: + chart_metrics = _split_list(chart_metrics_source) + except ConfigError as exc: + print(exc) # noqa: T201 sys.exit(1) - if args.plot_individual and args.chart_metrics: + + plot_sprint = bool( + args.plot_sprint or config_data.get("plot_sprint", False), + ) + plot_individual = bool( + args.plot_individual or config_data.get("plot_individual", False), + ) + individual_chart_metric = ( + args.individual_chart_metric + if args.individual_chart_metric + != parser.get_default("individual_chart_metric") + else config_data.get( + "individual_chart_metric", + args.individual_chart_metric, + ) + ) + + if plot_sprint and plot_individual: print( # noqa: T201 - "Error: plot individual and chart metrics are mutually exclusive.", + "Error: plot sprint and plot individual options are mutually " + "exclusive.", ) sys.exit(1) + config_repositories: list[str] | None = None + if "repositories" in config_data: + try: + config_repositories = _parse_repositories( + config_data.get("repositories"), + ) + except ConfigError as exc: + print(exc) # noqa: T201 + sys.exit(1) + + org_name = args.org if args.org is not None else config_data.get("org") + if not org_name and not config_repositories: + print("Error: Provide an organization or repositories to analyze") # noqa: T201 + sys.exit(1) + + use_cache_config = config_data.get("use_cache") + if args.no_cache: + use_cache = False + elif use_cache_config is None: + use_cache = True + elif isinstance(use_cache_config, bool): + use_cache = use_cache_config + else: + print("Configuration value 'use_cache' must be a boolean") # noqa: T201 + sys.exit(1) + return CommandLineArgs( - org_name=args.org, + org_name=org_name, + repositories=config_repositories, start_date=start_date, end_date=end_date, languages=languages, metrics=metrics, - sprint_analysis=args.sprint_analysis, - output_path=args.output_path, - plot_sprint=args.plot_sprint, - chart_type=args.chart_type, + sprint_analysis=bool( + args.sprint_analysis + or config_data.get("sprint_analysis", False), + ), + output_path=( + args.output_path + if args.output_path is not None + else config_data.get("output_path") + ), + plot_sprint=plot_sprint, + chart_type=( + args.chart_type + if args.chart_type != parser_defaults["chart_type"] + else config_data.get("chart_type", args.chart_type) + ), chart_metrics=chart_metrics, - save_plot=args.save_plot, - plot_individual=args.plot_individual, - individual_chart_metric=args.individual_chart_metric, - use_cache=not args.no_cache, - clear_cache=args.clear_cache, - clear_expired_cache=args.clear_expired_cache, - show_cache_stats=args.cache_stats, + save_plot=( + args.save_plot + if args.save_plot is not None + else config_data.get("save_plot") + ), + plot_individual=plot_individual, + individual_chart_metric=individual_chart_metric, + use_cache=use_cache, + clear_cache=bool( + args.clear_cache or config_data.get("clear_cache", False), + ), + clear_expired_cache=bool( + args.clear_expired_cache + or config_data.get("clear_expired_cache", False), + ), + show_cache_stats=bool( + args.cache_stats or config_data.get("show_cache_stats", False), + ), ) diff --git a/reviewtally/data_collection.py b/reviewtally/data_collection.py index 8dbd315..f9a66db 100644 --- a/reviewtally/data_collection.py +++ b/reviewtally/data_collection.py @@ -44,7 +44,6 @@ class ReviewDataContext: class ProcessRepositoriesContext: """Context object for repository processing.""" - org_name: str repo_names: tqdm start_date: datetime end_date: datetime @@ -177,9 +176,13 @@ def process_repositories( for repo in context.repo_names: timestamped_print(f"Processing {repo}") + owner, _, repo_name = repo.partition("/") + if not owner or not repo_name: + msg = f"Invalid repository identifier '{repo}'" + raise ValueError(msg) pull_requests = get_pull_requests_between_dates( - context.org_name, - repo, + owner, + repo_name, context.start_date, context.end_date, use_cache=context.use_cache, @@ -190,11 +193,11 @@ def process_repositories( f"{len(pull_requests)} pull requests", ) context.repo_names.set_description( - f"Processing {context.org_name}/{repo}", + f"Processing {owner}/{repo_name}", ) review_context = ReviewDataContext( - org_name=context.org_name, - repo=repo, + org_name=owner, + repo=repo_name, pull_requests=pull_requests, reviewer_stats=reviewer_stats, sprint_stats=context.sprint_stats, diff --git a/reviewtally/main.py b/reviewtally/main.py index ccfdf2e..53ab535 100644 --- a/reviewtally/main.py +++ b/reviewtally/main.py @@ -48,13 +48,25 @@ def main() -> None: f"Calling get_repos_by_language {time.time() - start_time:.2f} " "seconds", ) - repo_list = get_repos(args["org_name"], args["languages"]) - if repo_list is None: - return - repo_names = tqdm(repo_list) + if args["repositories"]: + repo_identifiers = args["repositories"] + else: + org_name = args["org_name"] + if org_name is None: + timestamped_print( + "No organization provided and no repositories specified; " + "exiting.", + ) + return + repo_list = get_repos(org_name, args["languages"]) + if repo_list is None: + return + repo_identifiers = [f"{org_name}/{repo}" for repo in repo_list] + + repo_names = tqdm(repo_identifiers) timestamped_print( - f"Finished get_repos_by_language {time.time() - start_time:.2f} " - f"seconds for {len(repo_names)} repositories", + f"Prepared repository list {time.time() - start_time:.2f} " + f"seconds for {len(repo_identifiers)} repositories", ) if args["sprint_analysis"] or args["plot_sprint"]: @@ -75,7 +87,6 @@ def _handle_sprint_analysis( sprint_stats: dict[str, dict[str, Any]] = {} process_context = ProcessRepositoriesContext( - org_name=args["org_name"], repo_names=repo_names, start_date=args["start_date"], end_date=args["end_date"], @@ -97,7 +108,7 @@ def _handle_sprint_analysis( if args["plot_sprint"]: plotting_context = SprintPlottingContext( team_metrics=team_metrics, - org_name=args["org_name"], + org_name=args["org_name"] or "Selected Repositories", start_date=args["start_date"], end_date=args["end_date"], chart_type=args["chart_type"], @@ -114,7 +125,6 @@ def _handle_individual_analysis( ) -> None: """Handle individual reviewer analysis mode.""" process_context = ProcessRepositoriesContext( - org_name=args["org_name"], repo_names=repo_names, start_date=args["start_date"], end_date=args["end_date"], @@ -144,7 +154,7 @@ def _handle_individual_plotting( args["individual_chart_metric"], ) - org_name = args["org_name"] or "Organization" + org_name = args["org_name"] or "Selected Repositories" title = ( f"{metric_display_name} Distribution - {org_name} | " f"{args['start_date'].date()} to {args['end_date'].date()}" diff --git a/tests/cli/test_parse_cmd_line.py b/tests/cli/test_parse_cmd_line.py index 2de19fb..875b845 100644 --- a/tests/cli/test_parse_cmd_line.py +++ b/tests/cli/test_parse_cmd_line.py @@ -1,5 +1,8 @@ +import os +import tempfile import unittest from datetime import datetime, timezone +from pathlib import Path from typing import Any from unittest.mock import patch @@ -212,11 +215,12 @@ def test_valid_dates_no_exit(self, mock_argv: Any, mock_exit: Any) -> None: # Assert mock_exit.assert_not_called() self.assertIsInstance(result, dict) - # Check it's the right type (14 original + 3 cache options = 17) - self.assertEqual(len(result), 17) + # Check it's the right type (added repositories config key) + self.assertEqual(len(result), 18) # Verify the parsed dates self.assertEqual(result["org_name"], "test-org") + self.assertIsNone(result["repositories"]) self.assertEqual( result["start_date"], datetime(2023, 1, 1, tzinfo=timezone.utc), @@ -306,5 +310,204 @@ def test_malformed_date_error_message_format( self.assertIn("Please use the format YYYY-MM-DD", printed_error) +class TestParseCmdLineConfig(unittest.TestCase): + """Tests for TOML configuration loading.""" + + def _create_config_file(self, contents: str) -> str: + fd, path = tempfile.mkstemp(suffix=".toml") + path_obj = Path(path) + with os.fdopen(fd, "w", encoding="utf-8") as handle: + handle.write(contents) + + def cleanup() -> None: + if path_obj.exists(): + path_obj.unlink() + + self.addCleanup(cleanup) + return str(path_obj) + + @patch("sys.exit") + @patch("sys.argv") + def test_config_without_org_uses_repositories( + self, + mock_argv: Any, + mock_exit: Any, + ) -> None: + """Configuration file can define repositories without CLI org.""" + config_path = self._create_config_file( + """ +repositories = ["octocat/hello-world", "octocat/test-repo"] +start_date = "2023-01-01" +end_date = "2023-01-15" +metrics = ["reviews", "comments"] +""", + ) + + mock_argv.__getitem__.side_effect = lambda x: [ + "review-tally", + "--config", + config_path, + ][x] + mock_argv.__len__.return_value = 3 + + result = parse_cmd_line() + + mock_exit.assert_not_called() + self.assertIsNone(result["org_name"]) + self.assertEqual( + result["repositories"], + ["octocat/hello-world", "octocat/test-repo"], + ) + self.assertEqual( + result["start_date"], + datetime(2023, 1, 1, tzinfo=timezone.utc), + ) + self.assertEqual( + result["metrics"], + ["reviews", "comments"], + ) + + @patch("sys.exit") + @patch("builtins.print") + @patch("sys.argv") + def test_invalid_repository_format_exits( + self, + mock_argv: Any, + mock_print: Any, + mock_exit: Any, + ) -> None: + """Invalid repository entries produce a clear error message.""" + config_path = self._create_config_file( + """ +repositories = ["invalidrepo"] +""", + ) + + mock_argv.__getitem__.side_effect = lambda x: [ + "review-tally", + "--config", + config_path, + ][x] + mock_argv.__len__.return_value = 3 + mock_exit.side_effect = SystemExit + + with self.assertRaises(SystemExit): + parse_cmd_line() + + mock_print.assert_called_once() + error_message = str(mock_print.call_args[0][0]) + self.assertIn("owner/repository", error_message) + + @patch("sys.exit") + @patch("sys.argv") + def test_plot_sprint_flag_does_not_conflict( + self, + mock_argv: Any, + mock_exit: Any, + ) -> None: + """Sprint plotting flag should not trigger spurious conflicts.""" + mock_argv.__getitem__.side_effect = lambda x: [ + "review-tally", + "-o", + "test-org", + "--plot-sprint", + ][x] + mock_argv.__len__.return_value = 4 + + result = parse_cmd_line() + + mock_exit.assert_not_called() + self.assertTrue(result["plot_sprint"]) + self.assertFalse(result["plot_individual"]) + + @patch("sys.exit") + @patch("sys.argv") + def test_plot_individual_flag_does_not_conflict( + self, + mock_argv: Any, + mock_exit: Any, + ) -> None: + """Individual plotting flag should not trigger spurious conflicts.""" + mock_argv.__getitem__.side_effect = lambda x: [ + "review-tally", + "-o", + "test-org", + "--plot-individual", + ][x] + mock_argv.__len__.return_value = 4 + + result = parse_cmd_line() + + mock_exit.assert_not_called() + self.assertTrue(result["plot_individual"]) + self.assertFalse(result["plot_sprint"]) + + @patch("sys.exit") + @patch("builtins.print") + @patch("sys.argv") + def test_config_cannot_enable_both_plot_modes( + self, + mock_argv: Any, + mock_print: Any, + mock_exit: Any, + ) -> None: + """Configuration enabling both plot modes should exit with error.""" + config_path = self._create_config_file( + """ +plot_sprint = true +plot_individual = true +repositories = ["octocat/hello-world"] +start_date = "2023-01-01" +end_date = "2023-01-02" +""", + ) + + mock_argv.__getitem__.side_effect = lambda x: [ + "review-tally", + "--config", + config_path, + ][x] + mock_argv.__len__.return_value = 3 + mock_exit.side_effect = SystemExit + + with self.assertRaises(SystemExit): + parse_cmd_line() + + mock_exit.assert_called_once_with(1) + mock_print.assert_called_once() + error_message = str(mock_print.call_args[0][0]) + self.assertIn("mutually exclusive", error_message) + + @patch("sys.exit") + @patch("sys.argv") + def test_cli_can_disable_cache_even_if_config_enables_it( + self, + mock_argv: Any, + mock_exit: Any, + ) -> None: + """The --no-cache flag should override a configuration value.""" + config_path = self._create_config_file( + """ +use_cache = true +repositories = ["octocat/hello-world"] +start_date = "2023-01-01" +end_date = "2023-01-02" +""", + ) + + mock_argv.__getitem__.side_effect = lambda x: [ + "review-tally", + "--config", + config_path, + "--no-cache", + ][x] + mock_argv.__len__.return_value = 4 + + result = parse_cmd_line() + + mock_exit.assert_not_called() + self.assertFalse(result["use_cache"]) + + if __name__ == "__main__": unittest.main()