diff --git a/core/Db/Schema.php b/core/Db/Schema.php index a222eee65c8..1d57342882a 100644 --- a/core/Db/Schema.php +++ b/core/Db/Schema.php @@ -313,6 +313,16 @@ public function supportsSortingInSubquery(): bool return $this->getSchema()->supportsSortingInSubquery(); } + /** + * Returns if the database engine supports window functions. + * + * @return bool + */ + public function supportsWindowFunctions(): bool + { + return $this->getSchema()->supportsWindowFunctions(); + } + /** * Returns the supported read isolation transaction level * diff --git a/core/Db/Schema/Mariadb.php b/core/Db/Schema/Mariadb.php index 6d2ce0c1a30..7027f709944 100644 --- a/core/Db/Schema/Mariadb.php +++ b/core/Db/Schema/Mariadb.php @@ -57,6 +57,13 @@ public function supportsRankingRollupWithoutExtraSorting(): bool return false; } + public function supportsWindowFunctions(): bool + { + $version = strtolower($this->getVersion()); + + return version_compare($version, '10.2', '>='); + } + public function hasReachedEOL(): bool { $currentVersion = $this->getVersion(); diff --git a/core/Db/Schema/Mysql.php b/core/Db/Schema/Mysql.php index 243749deb7f..eb964116687 100644 --- a/core/Db/Schema/Mysql.php +++ b/core/Db/Schema/Mysql.php @@ -31,6 +31,11 @@ class Mysql implements SchemaInterface public const OPTION_NAME_MATOMO_INSTALL_VERSION = 'install_version'; public const MAX_TABLE_NAME_LENGTH = 64; + /** + * @var string|null + */ + private $databaseVersion = null; + private $tablesInstalled = null; public function getDatabaseType(): string @@ -786,6 +791,18 @@ public function supportsSortingInSubquery(): bool return true; } + public function supportsWindowFunctions(): bool + { + $version = strtolower($this->getVersion()); + + // If MySQL is configured but MariaDb used don't take chances + if (str_contains($version, 'mariadb')) { + return false; + } + + return version_compare($version, '8.0', '>='); + } + public function getSupportedReadIsolationTransactionLevel(): string { return 'READ UNCOMMITTED'; @@ -871,7 +888,11 @@ private function getTablePrefix() public function getVersion(): string { - return Db::fetchOne("SELECT VERSION()"); + if (null === $this->databaseVersion) { + $this->databaseVersion = Db::fetchOne("SELECT VERSION()"); + } + + return $this->databaseVersion; } protected function getTableStatus() diff --git a/core/Db/Schema/Tidb.php b/core/Db/Schema/Tidb.php index 6962f9c2268..7d0b85fce2e 100644 --- a/core/Db/Schema/Tidb.php +++ b/core/Db/Schema/Tidb.php @@ -88,6 +88,11 @@ public function supportsSortingInSubquery(): bool return false; } + public function supportsWindowFunctions(): bool + { + return true; + } + public function getSupportedReadIsolationTransactionLevel(): string { // TiDB doesn't support READ UNCOMMITTED diff --git a/core/Db/SchemaInterface.php b/core/Db/SchemaInterface.php index f3bb864f573..4e16a79ff3a 100644 --- a/core/Db/SchemaInterface.php +++ b/core/Db/SchemaInterface.php @@ -198,6 +198,13 @@ public function supportsRankingRollupWithoutExtraSorting(): bool; */ public function supportsSortingInSubquery(): bool; + /** + * Returns if the database engine supports window functions. + * + * @return bool + */ + public function supportsWindowFunctions(): bool; + /** * Returns the version of the database server * @return string diff --git a/core/DbHelper.php b/core/DbHelper.php index 08221adab25..48c72495074 100644 --- a/core/DbHelper.php +++ b/core/DbHelper.php @@ -430,26 +430,59 @@ public static function addOptimizerHintToQuery(string $sql, string $hint): strin } /** - * Extracts the "ORDER BY" clause from a query. + * Extracts the "GROUP BY" clause from a query. * * Will return null if no clause found or the extraction failed, * e.g. parentheses in the extracted clause are not balanced. */ - public static function extractOrderByFromQuery(string $sql): ?string + public static function extractGroupByFromQuery(string $sql, bool $stripTableNames = false): ?string { - $pattern = '/.*ORDER\s+BY\s+(.*?)(?:\s+LIMIT|\s*;|\s*$)/is'; + $groupBy = self::extractClauseFromQuery( + $sql, + '/.*GROUP\s+BY\s+(.*?)(?:\s+(?:WITH|HAVING|WINDOW|ORDER|LIMIT)|\s*;|\s*$)(.*)/is' + ); - if (preg_match($pattern, $sql, $matches)) { - $orderBy = $matches[1]; - $openParentheses = substr_count($orderBy, '('); - $closeParentheses = substr_count($orderBy, ')'); + if ($stripTableNames && null !== $groupBy) { + return self::stripTableNamesFromQueryClause($groupBy); + } else { + return $groupBy; + } + } - if ($openParentheses === $closeParentheses) { - return trim($orderBy); - } + /** + * Extracts the "ORDER BY" clause from a query. + * + * Will return null if no clause found or the extraction failed, + * e.g. parentheses in the extracted clause are not balanced. + */ + public static function extractOrderByFromQuery(string $sql, bool $stripTableNames = false): ?string + { + $orderBy = self::extractClauseFromQuery( + $sql, + '/.*ORDER\s+BY\s+(.*?)(?:\s+LIMIT|\s*;|\s*$)(.*)/is' + ); + + if ($stripTableNames && null !== $orderBy) { + return self::stripTableNamesFromQueryClause($orderBy); + } else { + return $orderBy; } + } - return null; + /** + * Extracts the "SELECT" columns from a query. + * + * Will return null if no columns found or the extraction failed. + * + * Will skip comments and optimizer hints between the SELECT and the + * first column, but not between individual columns. + */ + public static function extractSelectFromQuery(string $sql): ?string + { + return self::extractClauseFromQuery( + $sql, + '/^\s*SELECT\s+(?:\/\*.*?\*\/\s*)*(.*?)(?:\s+FROM|\s*;|\s*$)/is' + ); } /** @@ -465,4 +498,47 @@ public static function isValidDbname($dbname) { return (0 !== preg_match('/(^[a-zA-Z0-9]+([a-zA-Z0-9\_\.\-\+]*))$/D', $dbname)); } + + private static function extractClauseFromQuery(string $query, string $pattern): ?string + { + preg_match($pattern, $query, $matches); + + if (empty($matches[1])) { + return null; + } + + $clause = trim($matches[1]); + $openParentheses = substr_count($clause, '('); + $closeParentheses = substr_count($clause, ')'); + + if ($openParentheses !== $closeParentheses) { + return null; + } + + // secondary match is after optional keywords + // check for balanced parentheses to avoid matching + // clause from a nested query + if (!empty($matches[2])) { + $postMatch = $matches[2]; + $openParentheses = substr_count($postMatch, '('); + $closeParentheses = substr_count($postMatch, ')'); + + if ($openParentheses !== $closeParentheses) { + return null; + } + } + + return $clause; + } + + private static function stripTableNamesFromQueryClause(string $clause): string + { + return preg_replace_callback( + '/`?\w+`?\.`?(\w+)`?/', + function (array $matches): string { + return '`' . $matches[1] . '`'; + }, + $clause + ); + } } diff --git a/core/RankingQuery.php b/core/RankingQuery.php index a7acb46bf0b..76705e9f9b6 100644 --- a/core/RankingQuery.php +++ b/core/RankingQuery.php @@ -49,43 +49,50 @@ class RankingQuery /** * Contains the labels of the inner query. * Format: "label" => true (to make sure labels don't appear twice) - * @var array + * + * @var array */ - private $labelColumns = array(); + private $labelColumns = []; /** * The columns of the inner query that are not labels * Format: "label" => "aggregation function" or false for no aggregation - * @var array + * + * @var array */ - private $additionalColumns = array(); + private $additionalColumns = []; /** * The limit for each group + * * @var int */ private $limit = 5; /** * The name of the columns that marks rows to be excluded from the limit - * @var string + * + * @var string|false */ private $columnToMarkExcludedRows = false; /** * The column that is used to partition the result - * @var bool|string + * + * @var string|false */ private $partitionColumn = false; /** * The possible values for the column $this->partitionColumn - * @var array + * + * @var array */ - private $partitionColumnValues = array(); + private $partitionColumnValues = []; /** * The value to use in the label of the 'Others' row. + * * @var string */ private $othersLabelValue = self::LABEL_SUMMARY_ROW; @@ -107,7 +114,7 @@ public function __construct($limit = false) * * @param int $limit */ - public function setLimit($limit) + public function setLimit(int $limit): void { $this->limit = $limit; } @@ -117,7 +124,7 @@ public function setLimit($limit) * * @param string $value */ - public function setOthersLabel($value) + public function setOthersLabel(string $value): void { $this->othersLabelValue = $value; } @@ -126,23 +133,25 @@ public function setOthersLabel($value) * Add a label column. * Labels are the columns that are replaced with "Others" after the limit. * - * @param string|array $labelColumn + * @param string|array $labelColumn */ - public function addLabelColumn($labelColumn) + public function addLabelColumn($labelColumn): void { if (is_array($labelColumn)) { foreach ($labelColumn as $label) { $this->addLabelColumn($label); } + return; } + $this->labelColumns[$labelColumn] = true; } /** - * @return array + * @return array */ - public function getLabelColumns() + public function getLabelColumns(): array { return $this->labelColumns; } @@ -150,18 +159,20 @@ public function getLabelColumns() /** * Add a column that has be added to the outer queries. * - * @param $column - * @param string|bool $aggregationFunction If set, this function is used to aggregate the values of "Others", - * eg, `'min'`, `'max'` or `'sum'`. + * @param int|string|array $column + * @param string|false $aggregationFunction If set, this function is used to aggregate the values of "Others", + * eg, `'min'`, `'max'` or `'sum'`. */ - public function addColumn($column, $aggregationFunction = false) + public function addColumn($column, $aggregationFunction = false): void { if (is_array($column)) { foreach ($column as $c) { $this->addColumn($c, $aggregationFunction); } + return; } + $this->additionalColumns[$column] = $aggregationFunction; } @@ -171,10 +182,10 @@ public function addColumn($column, $aggregationFunction = false) * into another array. Both the result and the array of excluded rows are returned * by {@link execute()}. * - * @param $column string Name of the column. + * @param string $column Name of the column. * @throws Exception if method is used more than once. */ - public function setColumnToMarkExcludedRows($column) + public function setColumnToMarkExcludedRows(string $column): void { if ($this->columnToMarkExcludedRows !== false) { throw new Exception("setColumnToMarkExcludedRows can only be used once"); @@ -198,11 +209,11 @@ public function setColumnToMarkExcludedRows($column) * where `log_action.type = TYPE_OUTLINK`, for rows where `log_action.type = TYPE_ACTION_URL` and for * rows `log_action.type = TYPE_DOWNLOAD`. * - * @param $partitionColumn string The column name to partition by. - * @param $possibleValues Array of possible column values. + * @param string $partitionColumn The column name to partition by. + * @param array $possibleValues Array of possible column values. * @throws Exception if method is used more than once. */ - public function partitionResultIntoMultipleGroups($partitionColumn, $possibleValues) + public function partitionResultIntoMultipleGroups(string $partitionColumn, array $possibleValues): void { if ($this->partitionColumn !== false) { throw new Exception("partitionResultIntoMultipleGroups can only be used once"); @@ -217,16 +228,16 @@ public function partitionResultIntoMultipleGroups($partitionColumn, $possibleVal * Executes the query. * The object has to be configured first using the other methods. * - * @param $innerQuery string The "payload" query that does the actual data aggregation. The ordering - * has to be specified in this query. {@link RankingQuery} cannot apply ordering - * itself. - * @param $bind array Bindings for the inner query. - * @param int $timeLimit Adds a MAX_EXECUTION_TIME query hint to the query if $timeLimit > 0 - * for more details see {@link DbHelper::addMaxExecutionTimeHintToQuery} - * @return array The format depends on which methods have been used - * to configure the ranking query. + * @param string $innerQuery The "payload" query that does the actual data aggregation. The ordering + * has to be specified in this query. {@link RankingQuery} cannot apply ordering + * itself. + * @param array $bind Bindings for the inner query. + * @param int $timeLimit Adds a MAX_EXECUTION_TIME query hint to the query if $timeLimit > 0 + * for more details see {@link DbHelper::addMaxExecutionTimeHintToQuery} + * @return array The format depends on which methods have been used + * to configure the ranking query. */ - public function execute($innerQuery, $bind = array(), $timeLimit = 0) + public function execute(string $innerQuery, array $bind, int $timeLimit = 0): array { $query = $this->generateRankingQuery($innerQuery); $query = DbHelper::addMaxExecutionTimeHintToQuery($query, $timeLimit); @@ -235,8 +246,9 @@ public function execute($innerQuery, $bind = array(), $timeLimit = 0) if ($this->columnToMarkExcludedRows !== false) { // split the result into the regular result and the rows with special treatment - $excludedFromLimit = array(); - $result = array(); + $excludedFromLimit = []; + $result = []; + foreach ($data as &$row) { if ($row[$this->columnToMarkExcludedRows] != 0) { $excludedFromLimit[] = $row; @@ -244,10 +256,11 @@ public function execute($innerQuery, $bind = array(), $timeLimit = 0) $result[] = $row; } } - $data = array( + + $data = [ 'result' => &$result, 'excludedFromLimit' => &$excludedFromLimit - ); + ]; } if ($this->partitionColumn !== false) { @@ -261,16 +274,25 @@ public function execute($innerQuery, $bind = array(), $timeLimit = 0) return $data; } - private function splitPartitions(&$data) + /** + * @param array> $data + * + * @return array>> + */ + private function splitPartitions(array &$data): array { - $result = array(); + $result = []; + foreach ($data as &$row) { $partition = $row[$this->partitionColumn]; + if (!isset($result[$partition])) { - $result[$partition] = array(); + $result[$partition] = []; } - $result[$partition][] = & $row; + + $result[$partition][] = &$row; } + return $result; } @@ -279,40 +301,124 @@ private function splitPartitions(&$data) * If you want to get the result, use execute() instead. If you want to run the query * yourself, use this method. * - * @param $innerQuery string The "payload" query that does the actual data aggregation. The ordering - * has to be specified in this query. {@link RankingQuery} cannot apply ordering - * itself. - * @param $withRollup bool A flag which determines whether to generate the SQL query using ROLLUP - * @return string The entire ranking query SQL. + * @param string $innerQuery The "payload" query that does the actual data aggregation. The ordering + * has to be specified in this query. {@link RankingQuery} cannot apply ordering + * itself. + * @param bool $withRollup A flag which determines whether to generate the SQL query using ROLLUP + * @return string The entire ranking query SQL. */ - public function generateRankingQuery($innerQuery, bool $withRollup = false) + public function generateRankingQuery(string $innerQuery, bool $withRollup = false): string { // +1 to include "Others" $limit = $this->limit + 1; - $counterExpression = $this->getCounterExpression($limit, $withRollup); - // generate select clauses for label columns - $labelColumnsString = '`' . implode('`, `', array_keys($this->labelColumns)) . '`'; - $labelColumnsOthersSwitch = array(); - $withRollupColumns = array(); + $labelColumnsString = $this->generateLabelColumnsString(); + $labelColumnsOthersSwitch = $this->generateLabelColumnsOthersSwitch($limit, $withRollup); + $additionalColumnsExpressions = $this->generateAdditionalColumnsExpressions(); + + if (Schema::getInstance()->supportsWindowFunctions()) { + $windowOrderBy = $this->generateWindowOrderByExpression($innerQuery, $withRollup); + + $counterExpressions = $this->generateWindowCounterExpressions($windowOrderBy, $withRollup); + $counterRollupExpressions = $this->generateWindowCounterRollupExpressions($withRollup, $windowOrderBy); + $groupByExpression = $this->generateWindowGroupByExpression($limit, $withRollup); + } else { + $counterExpressions = $this->generateVariableCounterExpressions($limit, $withRollup); + $counterRollupExpressions = $this->generateVariableCounterRollupExpressions($limit, $withRollup); + $groupByExpression = $this->generateVariableGroupByExpression($withRollup); + } + + $innerQuery = $this->prepareInnerQuery($innerQuery); + $withCounterQuery = $this->prepareWithCounterQuery( + $innerQuery, + $withRollup, + $labelColumnsString, + $counterExpressions, + $counterRollupExpressions, + $additionalColumnsExpressions + ); + + $rankingSelectString = implode( + ', + ', + array_filter([ + $labelColumnsOthersSwitch, + $additionalColumnsExpressions['additionalColumnsAggregated'], + ]) + ); + + $rankingQuery = " + SELECT + $rankingSelectString + FROM ( $withCounterQuery ) AS withCounter + GROUP BY $groupByExpression + "; + + if (!Schema::getInstance()->supportsSortingInSubquery()) { + // When subqueries aren't sorted, we need to sort the result manually again + $rankingQuery .= " ORDER BY counter"; + + if ($withRollup) { + $rankingQuery .= ', counterRollup'; + } + } + + return $rankingQuery; + } + + /** + * Generate the additional column parts of the ranking query. + * + * @return array{additionalColumns: string, additionalColumnsAggregated: string} + */ + private function generateAdditionalColumnsExpressions(): array + { + $columnsString = ''; + $columnsAggregatedString = ''; + + if ([] !== $this->additionalColumns) { + $columnsToAggregate = []; + + foreach ($this->additionalColumns as $additionalColumn => $aggregation) { + if ($aggregation !== false) { + $columnsToAggregate[] = $aggregation . '(`' . $additionalColumn . '`) AS `' . $additionalColumn . '`'; + } else { + $columnsToAggregate[] = '`' . $additionalColumn . '`'; + } + } + + $columnsString = '`' . implode('`, `', array_keys($this->additionalColumns)) . '`'; + $columnsAggregatedString = implode(', ', $columnsToAggregate); + } + + return [ + 'additionalColumns' => $columnsString, + 'additionalColumnsAggregated' => $columnsAggregatedString, + ]; + } + + /** + * Generate the "Others" switch conditions for all label columns. + */ + private function generateLabelColumnsOthersSwitch(int $limit, bool $withRollup): string + { + $isFirstLabelColumn = true; + $switches = []; foreach (array_keys($this->labelColumns) as $column) { $rollupWhen = ''; - if ($withRollup) { - $rollupLimitValue = empty($withRollupColumns) ? - "'" . $this->othersLabelValue . "'" - : - 'NULL'; + if ($withRollup) { + $rollupLimitValue = $isFirstLabelColumn ? "'" . $this->othersLabelValue . "'" : 'NULL'; $rollupWhen = " WHEN counterRollup = $limit THEN $rollupLimitValue WHEN counterRollup > 0 THEN `$column` "; - $withRollupColumns[] = $column; + $isFirstLabelColumn = false; } - $labelColumnsOthersSwitch[] = " + $switches[] = " CASE $rollupWhen WHEN counter = $limit THEN '" . $this->othersLabelValue . "' @@ -320,161 +426,415 @@ public function generateRankingQuery($innerQuery, bool $withRollup = false) END AS `$column` "; } - $labelColumnsOthersSwitch = implode(', ', $labelColumnsOthersSwitch); - // generate select clauses for additional columns - $additionalColumnsString = ''; - $additionalColumnsAggregatedString = ''; - foreach ($this->additionalColumns as $additionalColumn => $aggregation) { - $additionalColumnsString .= ', `' . $additionalColumn . '`'; - if ($aggregation !== false) { - $additionalColumnsAggregatedString .= ', ' . $aggregation . '(`' . $additionalColumn . '`) AS `' . $additionalColumn . '`'; - } else { - $additionalColumnsAggregatedString .= ', `' . $additionalColumn . '`'; + return implode(', ', $switches); + } + + /** + * Generate the label column part of the ranking query. + */ + private function generateLabelColumnsString(): string + { + return '`' . implode('`, `', array_keys($this->labelColumns)) . '`'; + } + + /** + * Generate the ranking query counter expressions using variables. + * + * @return array{counter: string, init: string} + */ + private function generateVariableCounterExpressions(int $limit, bool $withRollup): array + { + $inits = []; + $whens = []; + + if ($this->columnToMarkExcludedRows !== false) { + // when a row has been specified that marks which records should be excluded + // from limiting, we don't give those rows the normal counter but -1 times the + // value they had before. this way, they have a separate number space (i.e. negative + // integers). + $whens[] = "WHEN {$this->columnToMarkExcludedRows} != 0 THEN -1 * {$this->columnToMarkExcludedRows}"; + } + + if ($withRollup) { + foreach (array_keys($this->labelColumns) as $column) { + $whens[] = "WHEN `$column` IS NULL THEN -1"; } } - // initialize the counters if ($this->partitionColumn !== false) { - $initCounter = ''; + // partition: one counter per possible value foreach ($this->partitionColumnValues as $value) { - $initCounter .= '( SELECT @counter' . intval($value) . ':=0 ) initCounter' . intval($value) . ', '; + $isValue = '`' . $this->partitionColumn . '` = ' . intval($value); + $partitionCounter = '@counter' . intval($value); + + $whens[] = "WHEN $isValue AND $partitionCounter = $limit THEN $limit"; + $whens[] = "WHEN $isValue THEN $partitionCounter := $partitionCounter + 1"; + $inits[] = "( SELECT $partitionCounter := 0 ) initCounter" . intval($value); } + + $whens[] = "ELSE 0"; } else { - $initCounter = '( SELECT @counter:=0 ) initCounter,'; + // no partitioning: add a single counter + $whens[] = "WHEN @counter = $limit THEN $limit"; + $whens[] = "ELSE @counter := @counter + 1"; + + $inits[] = '( SELECT @counter := 0 ) initCounter'; } - $counterRollupExpression = ''; + $init = implode(', ', $inits); + $counter = " + CASE + " . implode(" + ", $whens) . " + END AS counter + "; + + return [ + 'counter' => $counter, + 'init' => $init, + ]; + } - if ($withRollup && !empty($withRollupColumns)) { - $initCounter .= ' ( SELECT @counterRollup:=0 ) initCounterRollup,'; - $counterRollupWhen = ''; + /** + * Generate the rollup counter expressions using variables. + * + * @return array{counter: string, init: string} + */ + private function generateVariableCounterRollupExpressions(int $limit, bool $withRollup): array + { + $counter = ''; + $init = ''; - if (count($withRollupColumns) >= 2) { - $counterRollupWhen = " - WHEN `" . implode('` IS NULL AND `', $withRollupColumns) . "` IS NULL THEN -1 - "; - } + if ($withRollup) { + $rollupColumns = array_keys($this->labelColumns); + + $whens = [ + "WHEN `" . implode('` IS NULL AND `', $rollupColumns) . "` IS NULL THEN -1" + ]; - foreach ($withRollupColumns as $withRollupColumn) { - $counterRollupWhen .= " - WHEN `$withRollupColumn` IS NULL AND @counterRollup = $limit THEN $limit - WHEN `$withRollupColumn` IS NULL THEN @counterRollup := @counterRollup + 1 - "; + foreach ($rollupColumns as $withRollupColumn) { + $whens[] = "WHEN `$withRollupColumn` IS NULL AND @counterRollup = $limit THEN $limit"; + $whens[] = "WHEN `$withRollupColumn` IS NULL THEN @counterRollup := @counterRollup + 1"; } - $counterRollupExpression = " - , CASE - $counterRollupWhen + $init = '( SELECT @counterRollup := 0 ) initCounterRollup'; + $counter = " + CASE + " . implode(" + ", $whens) . " ELSE 0 END AS counterRollup - "; + "; } - if (false === strpos($innerQuery, ' LIMIT ') && !Schema::getInstance()->supportsSortingInSubquery()) { - // Setting a limit for the inner query forces the optimizer to use a temporary table, which uses the sorting - $innerQuery .= ' LIMIT 18446744073709551615'; + return [ + 'counter' => $counter, + 'init' => $init, + ]; + } + + private function generateVariableGroupByExpression(bool $withRollup): string + { + $groupBy = 'counter'; + + if ($withRollup) { + $groupBy .= ', counterRollup'; } - // add a counter to the query - // we rely on the sorting of the inner query - $withCounter = " - SELECT - $labelColumnsString, - $counterExpression AS counter - $counterRollupExpression - $additionalColumnsString - FROM - $initCounter - ( $innerQuery ) actualQuery - "; - - if ($withRollup && !empty($withRollupColumns) && !Schema::getInstance()->supportsRankingRollupWithoutExtraSorting()) { - // MariaDB requires an additional sorting layer to return - // the counter/counterRollup values we expect - $rollupColumnSorts = []; + if ($this->partitionColumn !== false) { + $groupBy .= ', `' . $this->partitionColumn . '`'; + } - foreach ($withRollupColumns as $withRollupColumn) { - $rollupColumnSorts[] = "`$withRollupColumn` IS NULL"; - } + return $groupBy; + } - $withCounter .= ' ORDER BY ' . implode(', ', $rollupColumnSorts); - $innerQueryOrderBy = DbHelper::extractOrderByFromQuery($innerQuery); + /** + * Generate the ranking query counter expressions using window functions. + * + * @return array{counter: string, init: string} + */ + private function generateWindowCounterExpressions(string $windowOrderBy, bool $withRollup): array + { + $partitionBy = ''; - if (null !== $innerQueryOrderBy) { - // copy ORDER BY from inner query to rollup sorting - $withCounter .= ', ' . $innerQueryOrderBy; + if ($this->partitionColumn !== false) { + $partitionBy = "PARTITION BY `{$this->partitionColumn}`"; + } + + $excludeWhens = []; + $orderByWhens = []; + + if ($this->columnToMarkExcludedRows !== false) { + $excludeWhens[] = "WHEN {$this->columnToMarkExcludedRows} != 0 THEN -1 * {$this->columnToMarkExcludedRows}"; + $orderByWhens[] = "WHEN {$this->columnToMarkExcludedRows} != 0 THEN 1 * {$this->columnToMarkExcludedRows}"; + } + + if ($withRollup) { + foreach (array_keys($this->labelColumns) as $column) { + $excludeWhens[] = "WHEN `$column` IS NULL THEN -1"; + $orderByWhens[] = "WHEN `$column` IS NULL THEN 1"; } } - // group by the counter - this groups "Others" because the counter stops at $limit - $groupBy = 'counter'; + if ([] === $orderByWhens) { + $orderBy = $windowOrderBy; + } else { + $orderBy = " + CASE + " . implode(" + ", $orderByWhens) . " + ELSE 0 + END, + $windowOrderBy + "; + } - if ($withRollup && !empty($counterRollupExpression)) { - $groupBy .= ', counterRollup'; + $rowNumberOver = "ROW_NUMBER() OVER ($partitionBy ORDER BY $orderBy)"; + + if ([] === $excludeWhens) { + $counter = " + $rowNumberOver AS counter + "; + } else { + $counter = " + CASE + " . implode(" + ", $excludeWhens) . " + ELSE $rowNumberOver + END AS counter + "; + } + + return [ + 'counter' => $counter, + 'init' => '', + ]; + } + + /** + * Generate the rollup counter expressions using window functions. + * + * @return array{counter: string, init: string} + */ + private function generateWindowCounterRollupExpressions(bool $withRollup, string $withRollupOrderBy): array + { + $counter = ''; + + if ($withRollup) { + $rollupColumns = array_keys($this->labelColumns); + + $orderBy = " + CASE + WHEN `" . implode('` IS NULL AND `', $rollupColumns) . "` IS NULL THEN 1 + WHEN `" . implode('` IS NULL OR `', $rollupColumns) . "` IS NULL THEN 0 + ELSE 1 + END, + $withRollupOrderBy + "; + + $counter = " + CASE + WHEN `" . implode('` IS NULL AND `', $rollupColumns) . "` IS NULL THEN -1 + WHEN `" . implode('` IS NOT NULL AND `', $rollupColumns) . "` IS NOT NULL THEN 0 + ELSE ROW_NUMBER() OVER (ORDER BY $orderBy) + END AS counterRollup + "; + } + + return [ + 'counter' => $counter, + 'init' => '', + ]; + } + + private function generateWindowGroupByExpression(int $limit, bool $withRollup): string + { + $groupBy = " + CASE + WHEN counter >= $limit THEN $limit + ELSE counter + END + "; + + if ($withRollup) { + $groupBy .= ", + CASE + WHEN counterRollup >= $limit THEN $limit + ELSE counterRollup + END + "; } if ($this->partitionColumn !== false) { $groupBy .= ', `' . $this->partitionColumn . '`'; } - $groupOthers = " - SELECT - $labelColumnsOthersSwitch - $additionalColumnsAggregatedString - FROM ( $withCounter ) AS withCounter - GROUP BY $groupBy - "; - if (!Schema::getInstance()->supportsSortingInSubquery()) { - // When subqueries aren't sorted, we need to sort the result manually again - $groupOthers .= " ORDER BY counter"; + return $groupBy; + } + + private function generateWindowOrderByExpression(string $innerQuery, bool $withRollup): string + { + $selectColumns = DbHelper::extractSelectFromQuery($innerQuery); + + if ($withRollup && '*' === $selectColumns) { + // special case for wrapped ROLLUP query + // we find the real columns one level deeper + $outerSelectPos = stripos($innerQuery, 'SELECT'); + $innerSelectPos = stripos($innerQuery, 'SELECT', $outerSelectPos + strlen('SELECT')); + $realInnerQuery = substr($innerQuery, $innerSelectPos); + $selectColumns = DbHelper::extractSelectFromQuery($realInnerQuery); + } + + if (null === $selectColumns || '*' === $selectColumns) { + // order by label columns if we can not find + // the named SELECT columns to order by + return $this->generateLabelColumnsString(); + } - if (!empty($counterRollupExpression)) { - $groupOthers .= ', counterRollup'; + $columns = DbHelper::extractOrderByFromQuery($innerQuery, true); + $columns = $this->prepareColumnsForWindowOrderBy($columns, $selectColumns); + + if (null === $columns) { + $columns = DbHelper::extractGroupByFromQuery($innerQuery, true); + + if (null === $columns && $withRollup) { + // a rollup has the GROUP BY inside the wrapper + $outerSelectPos = stripos($innerQuery, 'SELECT'); + $innerSelectPos = stripos($innerQuery, 'SELECT', $outerSelectPos + strlen('SELECT')); + $lastClosingParenthesis = strrpos($innerQuery, ')'); + + $realInnerQuery = substr($innerQuery, $innerSelectPos, $lastClosingParenthesis - $innerSelectPos); + $columns = DbHelper::extractGroupByFromQuery($realInnerQuery, true); } + + $columns = $this->prepareColumnsForWindowOrderBy($columns, $selectColumns); } - return $groupOthers; + if (null === $columns) { + $columns = $this->generateLabelColumnsString(); + } + + return $columns; } - private function getCounterExpression($limit, bool $withRollup = false) + private function prepareColumnsForWindowOrderBy(?string $expr, string $selectColumns): ?string { - $whens = array(); - - if ($this->columnToMarkExcludedRows !== false) { - // when a row has been specified that marks which records should be excluded - // from limiting, we don't give those rows the normal counter but -1 times the - // value they had before. this way, they have a separate number space (i.e. negative - // integers). - $whens[] = "WHEN {$this->columnToMarkExcludedRows} != 0 THEN -1 * {$this->columnToMarkExcludedRows}"; + if (null === $expr) { + return null; } - if ($withRollup) { - foreach (array_keys($this->labelColumns) as $column) { - $whens[] = "WHEN `$column` IS NULL THEN -1"; + $exprColumns = explode(',', $expr); + + foreach ($exprColumns as $i => &$exprColumn) { + $columnParts = explode(' ', trim($exprColumn)); + + if (count($columnParts) > 2) { + // the column contains more than just "column ASC" + // remove the column for safety, we don't know what we are really dealing with + unset($exprColumns[$i]); + continue; + } + + $column = str_replace('`', '', trim($columnParts[0])); + + if (preg_match('/`?' . $column . '`? AS [`"\']?(\w+)[`"\']?(?:,|$)/is', $selectColumns, $matches)) { + // unalias the column to allow usage in window + $column = trim($matches[1]); + $columnParts[0] = '`' . $column . '`'; + $exprColumn = implode(' ', $columnParts); + } + + if (!preg_match('/[`"\']?' . $column . '[`"\']?(?:,|$)/is', $selectColumns)) { + // the column was not found as "column," or "column" in the SELECT part + // we remove it from the window because it otherwise break the query + unset($exprColumns[$i]); } } - if ($this->partitionColumn !== false) { - // partition: one counter per possible value - foreach ($this->partitionColumnValues as $value) { - $isValue = '`' . $this->partitionColumn . '` = ' . intval($value); - $counter = '@counter' . intval($value); - $whens[] = "WHEN $isValue AND $counter = $limit THEN $limit"; - $whens[] = "WHEN $isValue THEN $counter:=$counter+1"; + if ([] === $exprColumns) { + return null; + } + + return implode(', ', $exprColumns); + } + + /** + * Prepare the inner query for usage in the ranking query. + */ + private function prepareInnerQuery(string $query): string + { + if (false === strpos($query, ' LIMIT ') && !Schema::getInstance()->supportsSortingInSubquery()) { + // Setting a limit for the inner query forces the optimizer to use a temporary table, which uses the sorting + $query .= ' LIMIT 18446744073709551615'; + } + + return $query; + } + + /** + * Prepare the query with added counters. + * + * @param array{counter: string, init: string} $counterExpressions + * @param array{counter: string, init: string} $counterRollupExpressions + * @param array{additionalColumns: string, additionalColumnsAggregated: string} $additionalColumnsExpressions + */ + private function prepareWithCounterQuery( + string $innerQuery, + bool $withRollup, + string $labelColumnsString, + array $counterExpressions, + array $counterRollupExpressions, + array $additionalColumnsExpressions + ): string { + $selectString = implode( + ', + ', + array_filter([ + $labelColumnsString, + $counterExpressions['counter'], + $counterRollupExpressions['counter'], + $additionalColumnsExpressions['additionalColumns'], + ]) + ); + + $fromString = implode( + ', + ', + array_filter([ + $counterExpressions['init'], + $counterRollupExpressions['init'], + "( $innerQuery ) actualQuery", + ]) + ); + + // add a counter to the query + // we rely on the sorting of the inner query + $query = " + SELECT + $selectString + FROM + $fromString + "; + + if ($withRollup && !Schema::getInstance()->supportsRankingRollupWithoutExtraSorting()) { + // MariaDB requires an additional sorting layer to return + // the counter/counterRollup values we expect + $rollupColumnSorts = []; + + foreach (array_keys($this->labelColumns) as $rollupColumn) { + $rollupColumnSorts[] = "`$rollupColumn` IS NULL"; + } + + $query .= ' ORDER BY ' . implode(', ', $rollupColumnSorts); + $innerQueryOrderBy = DbHelper::extractOrderByFromQuery($innerQuery, true); + + if (null !== $innerQueryOrderBy) { + // copy ORDER BY from inner query to rollup sorting + $query .= ', ' . $innerQueryOrderBy; } - $whens[] = "ELSE 0"; - } else { - // no partitioning: add a single counter - $whens[] = "WHEN @counter = $limit THEN $limit"; - $whens[] = "ELSE @counter:=@counter+1"; } - return " - CASE - " . implode(" - ", $whens) . " - END - "; + return $query; } } diff --git a/phpstan-baseline.neon b/phpstan-baseline.neon index c97e2166282..5d9d878ccaa 100644 --- a/phpstan-baseline.neon +++ b/phpstan-baseline.neon @@ -2280,21 +2280,6 @@ parameters: count: 1 path: core/QuickForm2.php - - - message: "#^Else branch is unreachable because previous condition is always true\\.$#" - count: 1 - path: core/RankingQuery.php - - - - message: "#^Property Piwik\\\\RankingQuery\\:\\:\\$columnToMarkExcludedRows \\(string\\) does not accept default value of type false\\.$#" - count: 1 - path: core/RankingQuery.php - - - - message: "#^Unreachable statement \\- code above always terminates\\.$#" - count: 1 - path: core/RankingQuery.php - - message: "#^Parameter \\#1 \\$viewDataTableId of method Piwik\\\\Report\\\\ReportWidgetConfig\\:\\:setDefaultViewDataTable\\(\\) expects string, null given\\.$#" count: 1 diff --git a/tests/PHPUnit/Integration/Db/Schema/MariadbTest.php b/tests/PHPUnit/Integration/Db/Schema/MariadbTest.php index 65c68ceeece..5e1925d1214 100644 --- a/tests/PHPUnit/Integration/Db/Schema/MariadbTest.php +++ b/tests/PHPUnit/Integration/Db/Schema/MariadbTest.php @@ -17,27 +17,54 @@ class MariadbTest extends IntegrationTestCase { /** * @dataProvider getIsOptimizeInnoDBTestData + * @dataProvider getGenericFeatureNotSupportedVersionTestData */ - public function testIsOptimizeInnoDBSupportedReturnsCorrectResult($version, $expectedResult) + public function testIsOptimizeInnoDBSupportedReturnsCorrectResult(string $version, bool $expectedResult): void { $schema = $this->getMockBuilder(Db\Schema\Mariadb::class)->onlyMethods(['getVersion'])->getMock(); $schema->method('getVersion')->willReturn($version); $this->assertEquals($expectedResult, $schema->isOptimizeInnoDBSupported()); } - public function getIsOptimizeInnoDBTestData() + public function getIsOptimizeInnoDBTestData(): array { - return array( - array("10.0.17-MariaDB-1~trusty", false), - array("10.1.1-MariaDB-1~trusty", true), - array("10.2.0-MariaDB-1~trusty", true), - array("10.6.19-0ubuntu0.14.04.1", true), // we expect true, as the version is high enough - array("8.0.11-TiDB-v8.1.0", false), - array("", false), - array("0", false), - array("slkdf(@*#lkesjfMariaDB", false), - array("slkdfjq3rujlkv", false), - ); + return [ + ['10.0.17-MariaDB-1~trusty', false], + ['10.1.1-MariaDB-1~trusty', true], + ['10.2.0-MariaDB-1~trusty', true], + ['10.6.19-0ubuntu0.14.04.1', true], + ]; + } + + /** + * @dataProvider getSupportsWindowFunctionsTestData + * @dataProvider getGenericFeatureNotSupportedVersionTestData + */ + public function testSupportsWindowFunctionsReturnsCorrectResult(string $version, bool $expectedResult): void + { + $schema = $this->getMockBuilder(Db\Schema\Mariadb::class)->onlyMethods(['getVersion'])->getMock(); + $schema->method('getVersion')->willReturn($version); + $this->assertEquals($expectedResult, $schema->supportsWindowFunctions()); + } + + public function getSupportsWindowFunctionsTestData(): array + { + return [ + ['10.1.1-MariaDB-1~trusty', false], + ['10.2.0-MariaDB-1~trusty', true], + ['10.6.19-0ubuntu0.14.04.1', true], + ]; + } + + public function getGenericFeatureNotSupportedVersionTestData(): array + { + return [ + ['8.0.11-TiDB-v8.1.0', false], + ['', false], + ['0', false], + ['slkdf(@*#lkesjfMariaDB', false], + ['slkdfjq3rujlkv', false], + ]; } public function testOptimize() diff --git a/tests/PHPUnit/Integration/Db/Schema/MysqlTest.php b/tests/PHPUnit/Integration/Db/Schema/MysqlTest.php index 7a2fd9eeaa3..57961dc9938 100644 --- a/tests/PHPUnit/Integration/Db/Schema/MysqlTest.php +++ b/tests/PHPUnit/Integration/Db/Schema/MysqlTest.php @@ -17,27 +17,56 @@ class MysqlTest extends IntegrationTestCase { /** * @dataProvider getIsOptimizeInnoDBTestData + * @dataProvider getGenericFeatureNotSupportedVersionTestData */ - public function testIsOptimizeInnoDBSupportedReturnsCorrectResult($version, $expectedResult) + public function testIsOptimizeInnoDBSupportedReturnsCorrectResult(string $version, bool $expectedResult): void { $schema = $this->getMockBuilder(Db\Schema\Mysql::class)->onlyMethods(['getVersion'])->getMock(); $schema->method('getVersion')->willReturn($version); $this->assertEquals($expectedResult, $schema->isOptimizeInnoDBSupported()); } - public function getIsOptimizeInnoDBTestData() + public function getIsOptimizeInnoDBTestData(): array { - return array( - array("10.0.17-MariaDB-1~trusty", false), - array("10.1.1-MariaDB-1~trusty", true), - array("10.2.0-MariaDB-1~trusty", true), - array("10.6.19-0ubuntu0.14.04.1", false), - array("8.0.11-TiDB-v8.1.0", false), - array("", false), - array("0", false), - array("slkdf(@*#lkesjfMariaDB", false), - array("slkdfjq3rujlkv", false), - ); + return [ + ['10.0.17-MariaDB-1~trusty', false], + ['10.1.1-MariaDB-1~trusty', true], + ['10.2.0-MariaDB-1~trusty', true], + ['10.6.19-0ubuntu0.14.04.1', false], + ['8.0.11-TiDB-v8.1.0', false], + ]; + } + + /** + * @dataProvider getSupportsWindowFunctionsTestData + * @dataProvider getGenericFeatureNotSupportedVersionTestData + */ + public function testSupportsWindowFunctionsReturnsCorrectResult(string $version, bool $expectedResult): void + { + $schema = $this->getMockBuilder(Db\Schema\Mysql::class)->onlyMethods(['getVersion'])->getMock(); + $schema->method('getVersion')->willReturn($version); + $this->assertEquals($expectedResult, $schema->supportsWindowFunctions()); + } + + public function getSupportsWindowFunctionsTestData(): array + { + return [ + ['5.5.68-MariaDB-log', false], + ['10.1.1-MariaDB-1~trusty', false], + ['5.7.44-log', false], + ['8.0.36', true], + ['9.4.16', true], + ]; + } + + public function getGenericFeatureNotSupportedVersionTestData(): array + { + return [ + ['', false], + ['0', false], + ['slkdf(@*#lkesjfMariaDB', false], + ['slkdfjq3rujlkv', false], + ]; } public function testOptimize() diff --git a/tests/PHPUnit/Integration/Db/Schema/TidbTest.php b/tests/PHPUnit/Integration/Db/Schema/TidbTest.php index 50a3989aa01..491126cc72b 100644 --- a/tests/PHPUnit/Integration/Db/Schema/TidbTest.php +++ b/tests/PHPUnit/Integration/Db/Schema/TidbTest.php @@ -21,6 +21,12 @@ public function testIsOptimizeInnoDBSupportedReturnsCorrectResult() $this->assertFalse($schema->isOptimizeInnoDBSupported()); } + public function testSupportsWindowFunctionsReturnsCorrectResult(): void + { + $schema = new Db\Schema\Tidb(); + $this->assertTrue($schema->supportsWindowFunctions()); + } + public function testOptimize() { if (!DatabaseConfig::isTiDb()) { diff --git a/tests/PHPUnit/Unit/DbHelperTest.php b/tests/PHPUnit/Unit/DbHelperTest.php index 590a86be9d6..ec88aae9b50 100644 --- a/tests/PHPUnit/Unit/DbHelperTest.php +++ b/tests/PHPUnit/Unit/DbHelperTest.php @@ -22,24 +22,194 @@ class DbHelperTest extends \PHPUnit\Framework\TestCase { /** - * @dataProvider getExtractOrderByFromQueryTestData + * @dataProvider getExtractGroupByFromQueryTestData */ - public function testExtractOrderByFromQuery(string $sql, ?string $expectedOrderBy): void + public function testExtractGroupByFromQuery(string $sql, bool $stripTableNames, ?string $expectedGroupBy): void + { + $extractedGroupBy = DbHelper::extractGroupByFromQuery($sql, $stripTableNames); + + $this->checkQueryExtraction($expectedGroupBy, $extractedGroupBy); + } + + public function getExtractGroupByFromQueryTestData(): iterable { - $extractedOrderBy = DbHelper::extractOrderByFromQuery($sql); + yield 'no clause' => [ + 'SELECT my_column FROM my_table', + false, + null, + ]; + + yield 'simple group by' => [ + ' + SELECT column_one, column_two + FROM my_table + GROUP BY column_one, column_two + ', + false, + 'column_one, column_two', + ]; + + yield 'multiple group by' => [ + ' + SELECT column_one + FROM ( + SELECT column_two + FROM my_table + GROUP BY column_two + ) AS my_data + GROUP BY column_one + ', + false, + 'column_one', + ]; + + yield 'nested group by ignored' => [ + ' + SELECT column_one + FROM ( + SELECT column_two + FROM my_table + GROUP BY column_two + ) AS my_data + ', + false, + null, + ]; + + yield 'nested group by ignored - with rollup' => [ + ' + SELECT column_one + FROM ( + SELECT column_two + FROM my_table + GROUP BY column_two WITH ROLLUP + ) AS my_data + ', + false, + null, + ]; + + yield 'nested group by ignored - having' => [ + ' + SELECT column_one + FROM ( + SELECT column_two + FROM my_table + GROUP BY column_two + HAVING COUNT(column_two) > 0 + ) AS my_data + ', + false, + null, + ]; + + yield 'query terminated by ;' => [ + ' + SELECT column_one, column_two + FROM my_table + GROUP BY column_one; + ', + false, + 'column_one', + ]; + + yield 'group by with following WITH' => [ + ' + SELECT column_one, column_two + FROM my_table + GROUP BY column_one WITH ROLLUP + ', + false, + 'column_one', + ]; + + yield 'group by with following HAVING' => [ + ' + SELECT column_one, column_two + FROM my_table + GROUP BY column_one + HAVING COUNT(column_two) > 0 + ', + false, + 'column_one', + ]; + + yield 'group by with following WINDOW' => [ + ' + SELECT column_one, column_two + FROM my_table + GROUP BY column_one + WINDOW x AS (ORDER BY column_two) + ', + false, + 'column_one', + ]; + + yield 'group by with following ORDER' => [ + ' + SELECT column_one, column_two + FROM my_table + GROUP BY column_one + ORDER BY column_two + ', + false, + 'column_one', + ]; + + yield 'group by with following LIMIT' => [ + ' + SELECT column_one, column_two + FROM my_table + GROUP BY column_one + LIMIT 1 + ', + false, + 'column_one', + ]; - // compare with collapsed whitespace - $expectedOrderBy = trim(preg_replace('/\s+/', ' ', $expectedOrderBy)); - $extractedOrderBy = trim(preg_replace('/\s+/', ' ', $extractedOrderBy)); + yield 'unbalanced parentheses' => [ + 'SELECT my_column FROM my_table GROUP BY column_one, (, column_two', + false, + null, + ]; + + yield 'with stripped table names' => [ + ' + SELECT column_one, column_two + FROM my_table + GROUP BY `my_table`.`column_one`, `column_two` + ', + true, + '`column_one`, `column_two`', + ]; + + yield 'without stripped table names' => [ + ' + SELECT column_one, column_two + FROM my_table + GROUP BY `my_table`.`column_one`, `column_two` + ', + false, + '`my_table`.`column_one`, `column_two`', + ]; + } - $this->assertSame($expectedOrderBy, $extractedOrderBy); + /** + * @dataProvider getExtractOrderByFromQueryTestData + */ + public function testExtractOrderByFromQuery(string $sql, bool $stripTableNames, ?string $expectedOrderBy): void + { + $extractedOrderBy = DbHelper::extractOrderByFromQuery($sql, $stripTableNames); + + $this->checkQueryExtraction($expectedOrderBy, $extractedOrderBy); } public function getExtractOrderByFromQueryTestData(): iterable { yield 'no clause' => [ 'SELECT my_column FROM my_table', - null + false, + null, ]; yield 'simple order by' => [ @@ -49,10 +219,11 @@ public function getExtractOrderByFromQueryTestData(): iterable ORDER BY column_one DESC, column_two ASC ', + false, ' column_one DESC, column_two ASC - ' + ', ]; yield 'multiple order by' => [ @@ -65,7 +236,8 @@ public function getExtractOrderByFromQueryTestData(): iterable ) AS my_data ORDER BY column_one ', - 'column_one' + false, + 'column_one', ]; yield 'nested order by ignored' => [ @@ -77,7 +249,22 @@ public function getExtractOrderByFromQueryTestData(): iterable ORDER BY column_two ) AS my_data ', - null + false, + null, + ]; + + yield 'nested order by with limit ignored' => [ + ' + SELECT column_one + FROM ( + SELECT column_two + FROM my_table + ORDER BY column_two + LIMIT 1 + ) AS my_data + ', + false, + null, ]; yield 'query terminated by ;' => [ @@ -86,22 +273,113 @@ public function getExtractOrderByFromQueryTestData(): iterable FROM my_table ORDER BY column_one DESC; ', - 'column_one DESC' + false, + 'column_one DESC', ]; - yield 'order by with following limit' => [ + yield 'order by with following LIMIT' => [ ' SELECT column_one, column_two FROM my_table ORDER BY column_one LIMIT 1 ', - 'column_one' + false, + 'column_one', ]; yield 'unbalanced parentheses' => [ 'SELECT my_column FROM my_table ORDER BY column_one, (, column_two', - null + false, + null, + ]; + + yield 'with stripped table names' => [ + ' + SELECT column_one, column_two + FROM my_table + ORDER BY `my_table`.`column_one`, `column_two` + ', + true, + '`column_one`, `column_two`', + ]; + + yield 'without stripped table names' => [ + ' + SELECT column_one, column_two + FROM my_table + ORDER BY `my_table`.`column_one`, `column_two` + ', + false, + '`my_table`.`column_one`, `column_two`', + ]; + } + + /** + * @dataProvider getExtractSelectFromQueryTestData + */ + public function testExtractSelectFromQuery(string $sql, ?string $expectedSelect): void + { + $extractedSelect = DbHelper::extractSelectFromQuery($sql); + + $this->checkQueryExtraction($expectedSelect, $extractedSelect); + } + + public function getExtractSelectFromQueryTestData(): iterable + { + yield 'no clause' => [ + 'SET @counter = 1', + null, + ]; + + yield 'minimal select' => [ + 'SELECT @@version', + '@@version', + ]; + + yield 'minimal select terminated by ;' => [ + 'SELECT @@version;', + '@@version', + ]; + + yield 'asterisk' => [ + ' + SELECT * + FROM my_table + ', + '*', + ]; + + yield 'skip comments and optimizer hints at the beginning' => [ + ' + SELECT /*+ OPTIMIZE */ + /* this will be skipped */ + column_one, + /* this will be kept */ + column_two + FROM my_table + ', + ' + column_one, + /* this will be kept */ + column_two + ', + ]; + + yield 'nested select by ignored' => [ + ' + SELECT column_one, column_two AS second_column + FROM ( + SELECT column_three + FROM my_table + ) AS my_data + ', + 'column_one, column_two AS second_column', + ]; + + yield 'unbalanced parentheses' => [ + 'SELECT my_column ( FROM my_table', + null, ]; } @@ -341,4 +619,20 @@ public function getAddOptimizerHintTestData(): iterable 'MAX_EXECUTION_TIME(100)', ]; } + + /** + * Compare two extracted SQL query parts with collapsed whitespace. + */ + private function checkQueryExtraction(?string $expected, ?string $extracted): void + { + if (null !== $expected) { + $expected = trim(preg_replace('/\s+/', ' ', $expected)); + } + + if (null !== $extracted) { + $extracted = trim(preg_replace('/\s+/', ' ', $extracted)); + } + + $this->assertSame($expected, $extracted); + } } diff --git a/tests/PHPUnit/Unit/RankingQueryTest.php b/tests/PHPUnit/Unit/RankingQueryTest.php index d54f4e3dc23..daff843b1e5 100644 --- a/tests/PHPUnit/Unit/RankingQueryTest.php +++ b/tests/PHPUnit/Unit/RankingQueryTest.php @@ -12,23 +12,91 @@ use Piwik\Db\Schema; use Piwik\RankingQuery; +/** + * @group Core + * @group RankingQuery + */ class RankingQueryTest extends \PHPUnit\Framework\TestCase { + public function tearDown(): void + { + Schema::unsetInstance(); + parent::tearDown(); + } + /** - * @group Core + * @dataProvider getBasicTestData + * @dataProvider getBasicTestDataWithRollup + * @dataProvider getExcludeRowsTestData + * @dataProvider getPartitionResultTestData */ - public function testBasic() + public function testRankingQuery( + Schema $mockSchema, + RankingQuery $rankingQuery, + string $innerQuery, + bool $withRollup, + string $expectedQuery + ): void { + Schema::setSingletonInstance($mockSchema); + + $query = $rankingQuery->generateRankingQuery($innerQuery, $withRollup); + + $queryNoWhitespace = preg_replace("/\s+/", "", $query); + $expectedNoWhitespace = preg_replace("/\s+/", "", $expectedQuery); + + $message = 'Unexpected query: ' . $query; + $this->assertEquals($expectedNoWhitespace, $queryNoWhitespace, $message); + } + + public function getBasicTestData(): iterable { - $query = new RankingQuery(); - $query->setOthersLabel('Others'); - $query->addLabelColumn('label'); - $query->addColumn('column'); - $query->addColumn('columnSum', 'sum'); - $query->setLimit(10); + $rankingQuery = new RankingQuery(); + $rankingQuery->setOthersLabel('Others'); + $rankingQuery->addLabelColumn('label'); + $rankingQuery->addColumn('column'); + $rankingQuery->addColumn('columnSum', 'sum'); + $rankingQuery->setLimit(10); - $innerQuery = "SELECT label, column, columnSum FROM myTable"; + $innerQuery = 'SELECT `label`, `column`, `columnSum` FROM `myTable`'; - $expected = " + $expectedQuery = " + SELECT + CASE + WHEN counter = 11 THEN 'Others' + ELSE `label` + END AS `label`, + `column`, + sum(`columnSum`) AS `columnSum` + FROM ( + SELECT + `label`, + ROW_NUMBER() OVER (ORDER BY `label`) AS counter, + `column`, + `columnSum` + FROM + ( SELECT `label`, `column`, `columnSum` FROM `myTable` ) actualQuery + ) AS withCounter + GROUP BY + CASE + WHEN counter >= 11 THEN 11 + ELSE counter + END + "; + + $mockSchema = $this->createMock(Schema::class); + $mockSchema->method('supportsRankingRollupWithoutExtraSorting')->willReturn(true); + $mockSchema->method('supportsSortingInSubquery')->willReturn(true); + $mockSchema->method('supportsWindowFunctions')->willReturn(true); + + yield 'basic - window functions' => [ + $mockSchema, + $rankingQuery, + $innerQuery, + false, + $expectedQuery, + ]; + + $expectedQuery = " SELECT CASE WHEN counter = 11 THEN 'Others' @@ -41,70 +109,175 @@ public function testBasic() `label`, CASE WHEN @counter = 11 THEN 11 - ELSE @counter:=@counter+1 + ELSE @counter := @counter + 1 END AS counter, `column`, `columnSum` FROM - ( SELECT @counter:=0 ) initCounter, - ( SELECT label, column, columnSum FROM myTable ) actualQuery - ) AS withCounter + ( SELECT @counter := 0 ) initCounter, + ( SELECT `label`, `column`, `columnSum` FROM `myTable` ) actualQuery + ) AS withCounter GROUP BY counter "; - if (!Schema::getInstance()->supportsSortingInSubquery()) { - $expected = " + $mockSchema = $this->createMock(Schema::class); + $mockSchema->method('supportsRankingRollupWithoutExtraSorting')->willReturn(true); + $mockSchema->method('supportsSortingInSubquery')->willReturn(true); + $mockSchema->method('supportsWindowFunctions')->willReturn(false); + + yield 'basic - no window functions' => [ + $mockSchema, + $rankingQuery, + $innerQuery, + false, + $expectedQuery, + ]; + + $expectedQuery = " + SELECT + CASE + WHEN counter = 11 THEN 'Others' + ELSE `label` + END AS `label`, + `column`, + sum(`columnSum`) AS `columnSum` + FROM ( SELECT + `label`, CASE - WHEN counter = 11 THEN 'Others' - ELSE `label` - END AS `label`, + WHEN @counter = 11 THEN 11 + ELSE @counter := @counter + 1 + END AS counter, `column`, - sum(`columnSum`) AS `columnSum` - FROM ( - SELECT - `label`, - CASE - WHEN @counter = 11 THEN 11 - ELSE @counter:=@counter+1 - END AS counter, - `column`, - `columnSum` - FROM - ( SELECT @counter:=0 ) initCounter, - ( SELECT label, column, columnSum FROM myTable LIMIT 18446744073709551615 ) actualQuery - ) AS withCounter - GROUP BY counter - ORDER BY counter - "; - } - - $this->checkQuery($query, $innerQuery, $expected); + `columnSum` + FROM + ( SELECT @counter := 0 ) initCounter, + ( SELECT `label`, `column`, `columnSum` FROM `myTable` LIMIT 18446744073709551615 ) actualQuery + ) AS withCounter + GROUP BY counter + ORDER BY counter + "; + + $mockSchema = $this->createMock(Schema::class); + $mockSchema->method('supportsRankingRollupWithoutExtraSorting')->willReturn(true); + $mockSchema->method('supportsSortingInSubquery')->willReturn(false); + $mockSchema->method('supportsWindowFunctions')->willReturn(false); + + yield 'basic - sorting in subquery not supported' => [ + $mockSchema, + $rankingQuery, + $innerQuery, + false, + $expectedQuery, + ]; } - /** - * @group Core - */ - public function testBasicWithRollup() + public function getBasicTestDataWithRollup(): iterable { - $query = new RankingQuery(); - $query->setOthersLabel('Others'); - $query->addLabelColumn('label'); - $query->addLabelColumn('url'); - $query->addColumn('column'); - $query->addColumn('columnSum', 'sum'); - $query->setLimit(10); - - $innerQuery = " + $rankingQuery = new RankingQuery(); + $rankingQuery->setOthersLabel('Others'); + $rankingQuery->addLabelColumn('label'); + $rankingQuery->addLabelColumn('url'); + $rankingQuery->addColumn('column'); + $rankingQuery->addColumn('columnSum', 'sum'); + $rankingQuery->setLimit(10); + + $innerQuery = ' SELECT * FROM ( SELECT `label`, `url`, `column`, `columnSum` FROM `myTable` GROUP BY `label`, `url` WITH ROLLUP ) AS rollupQuery ORDER BY `column` + '; + + $mockSchema = $this->createMock(Schema::class); + $mockSchema->method('supportsRankingRollupWithoutExtraSorting')->willReturn(true); + $mockSchema->method('supportsSortingInSubquery')->willReturn(true); + $mockSchema->method('supportsWindowFunctions')->willReturn(true); + + $expectedQuery = " + SELECT + CASE + WHEN counterRollup = 11 THEN 'Others' + WHEN counterRollup > 0 THEN `label` + WHEN counter = 11 THEN 'Others' + ELSE `label` + END AS `label`, + CASE + WHEN counterRollup = 11 THEN NULL + WHEN counterRollup > 0 THEN `url` + WHEN counter = 11 THEN 'Others' + ELSE `url` + END AS `url`, + `column`, + sum(`columnSum`) AS `columnSum` + FROM ( + SELECT + `label`, `url`, + CASE + WHEN `label` IS NULL THEN -1 + WHEN `url` IS NULL THEN -1 + ELSE ROW_NUMBER() OVER ( + ORDER BY + CASE + WHEN `label` IS NULL THEN 1 + WHEN `url` IS NULL THEN 1 + ELSE 0 + END, + `column` + ) + END AS counter, + CASE + WHEN `label` IS NULL AND `url` IS NULL THEN -1 + WHEN `label` IS NOT NULL AND `url` IS NOT NULL THEN 0 + ELSE ROW_NUMBER() OVER ( + ORDER BY + CASE + WHEN `label` IS NULL AND `url` IS NULL THEN 1 + WHEN `label` IS NULL OR `url` IS NULL THEN 0 + ELSE 1 + END, + `column` + ) + END AS counterRollup, + `column`, + `columnSum` + FROM + ( + SELECT * FROM ( + SELECT `label`, `url`, `column`, `columnSum` + FROM `myTable` + GROUP BY `label`, `url` WITH ROLLUP + ) AS rollupQuery + ORDER BY `column` + ) actualQuery + ) AS withCounter + GROUP BY + CASE + WHEN counter >= 11 THEN 11 + ELSE counter + END, + CASE + WHEN counterRollup >= 11 THEN 11 + ELSE counterRollup + END "; - $expected = " + yield 'basic with rollup - window functions' => [ + $mockSchema, + $rankingQuery, + $innerQuery, + true, + $expectedQuery, + ]; + + $mockSchema = $this->createMock(Schema::class); + $mockSchema->method('supportsRankingRollupWithoutExtraSorting')->willReturn(true); + $mockSchema->method('supportsSortingInSubquery')->willReturn(true); + $mockSchema->method('supportsWindowFunctions')->willReturn(false); + + $expectedQuery = " SELECT CASE WHEN counterRollup = 11 THEN 'Others' @@ -127,7 +300,7 @@ public function testBasicWithRollup() WHEN `label` IS NULL THEN -1 WHEN `url` IS NULL THEN -1 WHEN @counter = 11 THEN 11 - ELSE @counter:=@counter+1 + ELSE @counter := @counter + 1 END AS counter, CASE WHEN `label` IS NULL AND `url` IS NULL THEN -1 @@ -140,8 +313,8 @@ public function testBasicWithRollup() `column`, `columnSum` FROM - ( SELECT @counter:=0 ) initCounter, - ( SELECT @counterRollup:=0 ) initCounterRollup, + ( SELECT @counter := 0 ) initCounter, + ( SELECT @counterRollup := 0 ) initCounterRollup, ( SELECT * FROM ( SELECT `label`, `url`, `column`, `columnSum` @@ -150,134 +323,158 @@ public function testBasicWithRollup() ) AS rollupQuery ORDER BY `column` ) actualQuery - ) AS withCounter + ) AS withCounter GROUP BY counter, counterRollup "; - if (!Schema::getInstance()->supportsSortingInSubquery()) { - $expected = " - SELECT + yield 'basic with rollup - no window functions' => [ + $mockSchema, + $rankingQuery, + $innerQuery, + true, + $expectedQuery, + ]; + + $expectedQuery = " + SELECT + CASE + WHEN counterRollup = 11 THEN 'Others' + WHEN counterRollup > 0 THEN `label` + WHEN counter = 11 THEN 'Others' + ELSE `label` + END AS `label`, + CASE + WHEN counterRollup = 11 THEN NULL + WHEN counterRollup > 0 THEN `url` + WHEN counter = 11 THEN 'Others' + ELSE `url` + END AS `url`, + `column`, + sum(`columnSum`) AS `columnSum` + FROM ( + SELECT + `label`, `url`, CASE - WHEN counterRollup = 11 THEN 'Others' - WHEN counterRollup > 0 THEN `label` - WHEN counter = 11 THEN 'Others' - ELSE `label` - END AS `label`, + WHEN `label` IS NULL THEN -1 + WHEN `url` IS NULL THEN -1 + WHEN @counter = 11 THEN 11 + ELSE @counter := @counter + 1 + END AS counter, CASE - WHEN counterRollup = 11 THEN NULL - WHEN counterRollup > 0 THEN `url` - WHEN counter = 11 THEN 'Others' - ELSE `url` - END AS `url`, + WHEN `label` IS NULL AND `url` IS NULL THEN -1 + WHEN `label` IS NULL AND @counterRollup = 11 THEN 11 + WHEN `label` IS NULL THEN @counterRollup := @counterRollup + 1 + WHEN `url` IS NULL AND @counterRollup = 11 THEN 11 + WHEN `url` IS NULL THEN @counterRollup := @counterRollup + 1 + ELSE 0 + END AS counterRollup, `column`, - sum(`columnSum`) AS `columnSum` - FROM ( - SELECT - `label`, `url`, - CASE - WHEN `label` IS NULL THEN -1 - WHEN `url` IS NULL THEN -1 - WHEN @counter = 11 THEN 11 - ELSE @counter:=@counter+1 - END AS counter, - CASE - WHEN `label` IS NULL AND `url` IS NULL THEN -1 - WHEN `label` IS NULL AND @counterRollup = 11 THEN 11 - WHEN `label` IS NULL THEN @counterRollup := @counterRollup + 1 - WHEN `url` IS NULL AND @counterRollup = 11 THEN 11 - WHEN `url` IS NULL THEN @counterRollup := @counterRollup + 1 - ELSE 0 - END AS counterRollup, - `column`, - `columnSum` - FROM - ( SELECT @counter:=0 ) initCounter, - ( SELECT @counterRollup:=0 ) initCounterRollup, - ( - SELECT * FROM ( - SELECT `label`, `url`, `column`, `columnSum` - FROM `myTable` - GROUP BY `label`, `url` WITH ROLLUP - ) AS rollupQuery - ORDER BY `column` - LIMIT 18446744073709551615 - ) actualQuery + `columnSum` + FROM + ( SELECT @counter := 0 ) initCounter, + ( SELECT @counterRollup := 0 ) initCounterRollup, + ( + SELECT * FROM ( + SELECT `label`, `url`, `column`, `columnSum` + FROM `myTable` + GROUP BY `label`, `url` WITH ROLLUP + ) AS rollupQuery + ORDER BY `column` + LIMIT 18446744073709551615 + ) actualQuery ) AS withCounter - GROUP BY counter, counterRollup - ORDER BY counter, counterRollup - "; - } + GROUP BY counter, counterRollup + ORDER BY counter, counterRollup + "; + + $mockSchema = $this->createMock(Schema::class); + $mockSchema->method('supportsRankingRollupWithoutExtraSorting')->willReturn(true); + $mockSchema->method('supportsSortingInSubquery')->willReturn(false); + $mockSchema->method('supportsWindowFunctions')->willReturn(false); - if (!Schema::getInstance()->supportsRankingRollupWithoutExtraSorting()) { - $expected = " + yield 'basic with rollup - sorting in subquery not supported' => [ + $mockSchema, + $rankingQuery, + $innerQuery, + true, + $expectedQuery, + ]; + + $expectedQuery = " + SELECT + CASE + WHEN counterRollup = 11 THEN 'Others' + WHEN counterRollup > 0 THEN `label` + WHEN counter = 11 THEN 'Others' + ELSE `label` + END AS `label`, + CASE + WHEN counterRollup = 11 THEN NULL + WHEN counterRollup > 0 THEN `url` + WHEN counter = 11 THEN 'Others' + ELSE `url` + END AS `url`, + `column`, + sum(`columnSum`) AS `columnSum` + FROM ( SELECT + `label`, `url`, CASE - WHEN counterRollup = 11 THEN 'Others' - WHEN counterRollup > 0 THEN `label` - WHEN counter = 11 THEN 'Others' - ELSE `label` - END AS `label`, + WHEN `label` IS NULL THEN -1 + WHEN `url` IS NULL THEN -1 + WHEN @counter = 11 THEN 11 + ELSE @counter := @counter + 1 + END AS counter, CASE - WHEN counterRollup = 11 THEN NULL - WHEN counterRollup > 0 THEN `url` - WHEN counter = 11 THEN 'Others' - ELSE `url` - END AS `url`, + WHEN `label` IS NULL AND `url` IS NULL THEN -1 + WHEN `label` IS NULL AND @counterRollup = 11 THEN 11 + WHEN `label` IS NULL THEN @counterRollup := @counterRollup + 1 + WHEN `url` IS NULL AND @counterRollup = 11 THEN 11 + WHEN `url` IS NULL THEN @counterRollup := @counterRollup + 1 + ELSE 0 + END AS counterRollup, `column`, - sum(`columnSum`) AS `columnSum` - FROM ( - SELECT - `label`, `url`, - CASE - WHEN `label` IS NULL THEN -1 - WHEN `url` IS NULL THEN -1 - WHEN @counter = 11 THEN 11 - ELSE @counter:=@counter+1 - END AS counter, - CASE - WHEN `label` IS NULL AND `url` IS NULL THEN -1 - WHEN `label` IS NULL AND @counterRollup = 11 THEN 11 - WHEN `label` IS NULL THEN @counterRollup := @counterRollup + 1 - WHEN `url` IS NULL AND @counterRollup = 11 THEN 11 - WHEN `url` IS NULL THEN @counterRollup := @counterRollup + 1 - ELSE 0 - END AS counterRollup, - `column`, - `columnSum` - FROM - ( SELECT @counter:=0 ) initCounter, - ( SELECT @counterRollup:=0 ) initCounterRollup, - ( - SELECT * FROM ( - SELECT `label`, `url`, `column`, `columnSum` - FROM `myTable` - GROUP BY `label`, `url` WITH ROLLUP - ) AS rollupQuery - ORDER BY `column` - ) actualQuery + `columnSum` + FROM + ( SELECT @counter := 0 ) initCounter, + ( SELECT @counterRollup := 0 ) initCounterRollup, + ( + SELECT * FROM ( + SELECT `label`, `url`, `column`, `columnSum` + FROM `myTable` + GROUP BY `label`, `url` WITH ROLLUP + ) AS rollupQuery + ORDER BY `column` + ) actualQuery ORDER BY `label` IS NULL, `url` IS NULL, `column` ) AS withCounter - GROUP BY counter, counterRollup - "; - } + GROUP BY counter, counterRollup + "; - $this->checkQuery($query, $innerQuery, $expected, true); + $mockSchema = $this->createMock(Schema::class); + $mockSchema->method('supportsRankingRollupWithoutExtraSorting')->willReturn(false); + $mockSchema->method('supportsSortingInSubquery')->willReturn(true); + $mockSchema->method('supportsWindowFunctions')->willReturn(false); + + yield 'basic with rollup - ranking query without extra sorting not supported' => [ + $mockSchema, + $rankingQuery, + $innerQuery, + true, + $expectedQuery, + ]; } - /** - * @group Core - */ - public function testExcludeRows() + public function getExcludeRowsTestData(): iterable { + $rankingQuery = new RankingQuery(20); + $rankingQuery->setOthersLabel('Others'); + $rankingQuery->addLabelColumn('label'); + $rankingQuery->setColumnToMarkExcludedRows('exclude_marker'); - $query = new RankingQuery(20); - $query->setOthersLabel('Others'); - $query->addLabelColumn('label'); - $query->setColumnToMarkExcludedRows('exclude_marker'); - - $innerQuery = "SELECT label, 1 AS exclude_marker FROM myTable"; + $innerQuery = "SELECT `label`, 1 AS exclude_marker FROM myTable"; - $expected = " + $expectedQuery = " SELECT CASE WHEN counter = 21 THEN 'Others' @@ -289,65 +486,122 @@ public function testExcludeRows() `label`, CASE WHEN exclude_marker != 0 THEN -1 * exclude_marker - WHEN @counter = 21 THEN 21 - ELSE @counter:=@counter+1 + ELSE ROW_NUMBER() OVER ( + ORDER BY + CASE + WHEN exclude_marker != 0 THEN 1 * exclude_marker + ELSE 0 + END, + `label` + ) END AS counter, `exclude_marker` FROM - ( SELECT @counter:=0 ) initCounter, - ( SELECT label, 1 AS exclude_marker FROM myTable ) actualQuery - ) AS withCounter - GROUP BY counter + ( SELECT `label`, 1 AS exclude_marker FROM myTable ) actualQuery + ) AS withCounter + GROUP BY + CASE + WHEN counter >= 21 THEN 21 + ELSE counter + END "; - if (!Schema::getInstance()->supportsSortingInSubquery()) { - $expected = " + $mockSchema = $this->createMock(Schema::class); + $mockSchema->method('supportsRankingRollupWithoutExtraSorting')->willReturn(true); + $mockSchema->method('supportsSortingInSubquery')->willReturn(true); + $mockSchema->method('supportsWindowFunctions')->willReturn(true); + + yield 'exclude rows - window functions' => [ + $mockSchema, + $rankingQuery, + $innerQuery, + false, + $expectedQuery, + ]; + + $expectedQuery = " + SELECT + CASE + WHEN counter = 21 THEN 'Others' + ELSE `label` + END AS `label`, + `exclude_marker` + FROM ( SELECT + `label`, CASE - WHEN counter = 21 THEN 'Others' - ELSE `label` - END AS `label`, + WHEN exclude_marker != 0 THEN -1 * exclude_marker + WHEN @counter = 21 THEN 21 + ELSE @counter := @counter + 1 + END AS counter, `exclude_marker` - FROM ( - SELECT - `label`, - CASE - WHEN exclude_marker != 0 THEN -1 * exclude_marker - WHEN @counter = 21 THEN 21 - ELSE @counter:=@counter+1 - END AS counter, - `exclude_marker` - FROM - ( SELECT @counter:=0 ) initCounter, - ( SELECT label, 1 AS exclude_marker FROM myTable LIMIT 18446744073709551615 ) actualQuery + FROM + ( SELECT @counter := 0 ) initCounter, + ( SELECT `label`, 1 AS exclude_marker FROM myTable ) actualQuery ) AS withCounter - GROUP BY counter - ORDER BY counter - "; - } - - $this->checkQuery($query, $innerQuery, $expected); - - $query = new RankingQuery('20'); - $query->setOthersLabel('Others'); - $query->addLabelColumn('label'); - $query->setColumnToMarkExcludedRows('exclude_marker'); - $this->checkQuery($query, $innerQuery, $expected); + GROUP BY counter + "; + + $mockSchema = $this->createMock(Schema::class); + $mockSchema->method('supportsRankingRollupWithoutExtraSorting')->willReturn(true); + $mockSchema->method('supportsSortingInSubquery')->willReturn(true); + $mockSchema->method('supportsWindowFunctions')->willReturn(false); + + yield 'exclude rows - no window functions' => [ + $mockSchema, + $rankingQuery, + $innerQuery, + false, + $expectedQuery, + ]; } - /** - * @group Core - */ - public function testPartitionResult() + public function getPartitionResultTestData(): iterable { - $query = new RankingQuery(1000); - $query->setOthersLabel('Others'); - $query->addLabelColumn('label'); - $query->partitionResultIntoMultipleGroups('partition', array(1, 2, 3)); + $rankingQuery = new RankingQuery(1000); + $rankingQuery->setOthersLabel('Others'); + $rankingQuery->addLabelColumn('label'); + $rankingQuery->partitionResultIntoMultipleGroups('partition', [1, 2, 3]); - $innerQuery = "SELECT label, partition FROM myTable"; + $innerQuery = "SELECT `label`, `partition` FROM `myTable`"; - $expected = " + $expectedQuery = " + SELECT + CASE + WHEN counter = 1001 THEN 'Others' + ELSE `label` + END AS `label`, + `partition` + FROM ( + SELECT + `label`, + ROW_NUMBER() OVER (PARTITION BY `partition` ORDER BY `label`) AS counter, + `partition` + FROM + ( SELECT `label`, `partition` FROM `myTable` ) actualQuery + ) AS withCounter + GROUP BY + CASE + WHEN counter >= 1001 THEN 1001 + ELSE counter + END, + `partition` + "; + + $mockSchema = $this->createMock(Schema::class); + $mockSchema->method('supportsRankingRollupWithoutExtraSorting')->willReturn(true); + $mockSchema->method('supportsSortingInSubquery')->willReturn(true); + $mockSchema->method('supportsWindowFunctions')->willReturn(true); + + yield 'partition result - window functions' => [ + $mockSchema, + $rankingQuery, + $innerQuery, + false, + $expectedQuery, + ]; + + $expectedQuery = " SELECT CASE WHEN counter = 1001 THEN 'Others' @@ -359,75 +613,176 @@ public function testPartitionResult() `label`, CASE WHEN `partition` = 1 AND @counter1 = 1001 THEN 1001 - WHEN `partition` = 1 THEN @counter1:=@counter1+1 + WHEN `partition` = 1 THEN @counter1 := @counter1 + 1 WHEN `partition` = 2 AND @counter2 = 1001 THEN 1001 - WHEN `partition` = 2 THEN @counter2:=@counter2+1 + WHEN `partition` = 2 THEN @counter2 := @counter2 + 1 WHEN `partition` = 3 AND @counter3 = 1001 THEN 1001 - WHEN `partition` = 3 THEN @counter3:=@counter3+1 + WHEN `partition` = 3 THEN @counter3 := @counter3 + 1 ELSE 0 END AS counter, `partition` FROM - ( SELECT @counter1:=0 ) initCounter1, - ( SELECT @counter2:=0 ) initCounter2, - ( SELECT @counter3:=0 ) initCounter3, - ( SELECT label, partition FROM myTable ) actualQuery - ) AS withCounter + ( SELECT @counter1 := 0 ) initCounter1, + ( SELECT @counter2 := 0 ) initCounter2, + ( SELECT @counter3 := 0 ) initCounter3, + ( SELECT `label`, `partition` FROM `myTable` ) actualQuery + ) AS withCounter GROUP BY counter, `partition` "; - if (!Schema::getInstance()->supportsSortingInSubquery()) { - $expected = " - SELECT - CASE - WHEN counter = 1001 THEN 'Others' - ELSE `label` - END AS `label`, - `partition` - FROM ( - SELECT - `label`, - CASE - WHEN `partition` = 1 AND @counter1 = 1001 THEN 1001 - WHEN `partition` = 1 THEN @counter1:=@counter1+1 - WHEN `partition` = 2 AND @counter2 = 1001 THEN 1001 - WHEN `partition` = 2 THEN @counter2:=@counter2+1 - WHEN `partition` = 3 AND @counter3 = 1001 THEN 1001 - WHEN `partition` = 3 THEN @counter3:=@counter3+1 - ELSE 0 - END AS counter, - `partition` - FROM - ( SELECT @counter1:=0 ) initCounter1, - ( SELECT @counter2:=0 ) initCounter2, - ( SELECT @counter3:=0 ) initCounter3, - ( SELECT label, partition FROM myTable LIMIT 18446744073709551615 ) actualQuery - ) AS withCounter - GROUP BY counter, `partition` - ORDER BY counter - "; - } + $mockSchema = $this->createMock(Schema::class); + $mockSchema->method('supportsRankingRollupWithoutExtraSorting')->willReturn(true); + $mockSchema->method('supportsSortingInSubquery')->willReturn(true); + $mockSchema->method('supportsWindowFunctions')->willReturn(false); - $this->checkQuery($query, $innerQuery, $expected); + yield 'partition result - no window functions' => [ + $mockSchema, + $rankingQuery, + $innerQuery, + false, + $expectedQuery, + ]; } /** - * @param RankingQuery $rankingQuery - * @param string $innerQuerySql - * @param string $expected + * @dataProvider getGenerateWindowOrderByStringTestData */ - private function checkQuery( + public function testGenerateWindowOrderByString( RankingQuery $rankingQuery, - string $innerQuerySql, - string $expected, - bool $withRollup = false - ) { - $query = $rankingQuery->generateRankingQuery($innerQuerySql, $withRollup); + string $innerQuery, + bool $withRollup, + string $expectedOrderBy + ): void { + $reflection = new \ReflectionClass($rankingQuery); + $method = $reflection->getMethod('generateWindowOrderByExpression'); + $method->setAccessible(true); - $queryNoWhitespace = preg_replace("/\s+/", "", $query); - $expectedNoWhitespace = preg_replace("/\s+/", "", $expected); + $windowOrderBy = $method->invokeArgs($rankingQuery, [$innerQuery, $withRollup]); - $message = 'Unexpected query: ' . $query; - $this->assertEquals($queryNoWhitespace, $expectedNoWhitespace, $message); + $this->assertEquals($expectedOrderBy, $windowOrderBy); + } + + public function getGenerateWindowOrderByStringTestData(): iterable + { + $rankingQuery = new RankingQuery(1); + $rankingQuery->addLabelColumn('label_1'); + $rankingQuery->addLabelColumn('label_2'); + + $reflection = new \ReflectionClass($rankingQuery); + $method = $reflection->getMethod('generateLabelColumnsString'); + $method->setAccessible(true); + + $labelColumnsString = $method->invokeArgs($rankingQuery, []); + + yield 'SELECT extraction fails' => [ + $rankingQuery, + 'SET @counter = 1', + false, + $labelColumnsString, + ]; + + yield 'SELECT extraction fails - with rollup' => [ + $rankingQuery, + 'SELECT * FROM (SET @counter = 1)', + true, + $labelColumnsString, + ]; + + yield 'window matches ORDER BY' => [ + $rankingQuery, + 'SELECT column_one, column_two FROM my_table GROUP BY column_two ORDER BY column_one ASC', + false, + 'column_one ASC', + ]; + + yield 'window matches ORDER BY - rollup' => [ + $rankingQuery, + ' + SELECT * + FROM ( + SELECT column_one, column_two + FROM my_table + GROUP BY column_two WITH ROLLUP + ) AS rollupQuery + ORDER BY column_one ASC + ', + true, + 'column_one ASC', + ]; + + yield 'window matches GROUP BY' => [ + $rankingQuery, + ' + SELECT column_one, column_two + FROM my_table + GROUP BY column_one + ', + false, + 'column_one', + ]; + + yield 'window matches GROUP BY - rollup' => [ + $rankingQuery, + ' + SELECT * + FROM ( + SELECT column_one, column_two + FROM my_table + GROUP BY column_one WITH ROLLUP + ) AS rollupQuery + ', + true, + 'column_one', + ]; + + yield 'unselected column is removed' => [ + $rankingQuery, + 'SELECT column_one FROM my_table ORDER BY column_one, column_two', + false, + 'column_one', + ]; + + yield 'column aliases are resolved' => [ + $rankingQuery, + 'SELECT column_one AS column_new FROM my_table ORDER BY column_one', + false, + '`column_new`', + ]; + + yield 'column aliases are resolved - backtick quoted' => [ + $rankingQuery, + 'SELECT column_one AS `column_new` FROM my_table ORDER BY column_one', + false, + '`column_new`', + ]; + + yield 'column aliases are resolved - double quoted' => [ + $rankingQuery, + 'SELECT column_one AS "column_new" FROM my_table ORDER BY column_one', + false, + '`column_new`', + ]; + + yield 'column aliases are resolved - single quoted' => [ + $rankingQuery, + "SELECT column_one AS 'column_new' FROM my_table ORDER BY column_one", + false, + '`column_new`', + ]; + + yield 'column aliases are resolved - rollup' => [ + $rankingQuery, + ' + SELECT * + FROM ( + SELECT column_one AS column_new + FROM my_table + GROUP BY column_two WITH ROLLUP + ) AS rollupQuery + ORDER BY column_new + ', + true, + 'column_new', + ]; } }