From 1ef710ff76392ca135514e54b836cc4ff67e7608 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adam=20Zieli=C5=84ski?= Date: Fri, 11 Oct 2024 16:30:37 +0200 Subject: [PATCH 01/31] Data liberation: Kickoff the project --- .../playground/data-liberation/project.json | 22 ++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/packages/playground/data-liberation/project.json b/packages/playground/data-liberation/project.json index 43d1d5cf146..9ccbdfa3fce 100644 --- a/packages/playground/data-liberation/project.json +++ b/packages/playground/data-liberation/project.json @@ -3,5 +3,25 @@ "$schema": "../../../node_modules/nx/schemas/project-schema.json", "sourceRoot": "packages/playground/data-liberation", "projectType": "library", - "targets": {} + "targets": { + "test": { + "executor": "nx:run-commands", + "options": { + "cwd": "packages/playground/data-liberation", + "commands": ["phpunit -c ./phpunit.xml"], + "parallel": false + } + }, + "test:watch": { + "executor": "nx:run-commands", + "options": { + "cwd": "packages/playground/data-liberation", + "commands": [ + "phpunit -c ./phpunit.xml", + "fswatch -o ./**/*.php | xargs -n1 -I{} phpunit -c ./phpunit.xml" + ], + "parallel": false + } + } + } } From 234a8bf90fe2ca1ef69e649e6fefc6dc70f5bfbf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adam=20Zieli=C5=84ski?= Date: Sun, 13 Oct 2024 21:12:01 +0200 Subject: [PATCH 02/31] Port the URL rewriters from adamziel/site-transfer-protocol --- .gitignore | 2 + .../bin/regenerate_public_suffix_list.php | 41 + .../data-liberation/bin/rewrite-urls.php | 74 + .../playground/data-liberation/bootstrap.php | 40 + .../playground/data-liberation/composer.json | 20 + .../playground/data-liberation/composer.lock | 388 + .../playground/data-liberation/phpunit.xml | 11 + .../playground/data-liberation/src/README.md | 109 + .../src/WP_Block_Markup_Processor.php | 388 + .../src/WP_Block_Markup_Url_Processor.php | 286 + .../WP_Migration_URL_In_Text_Processor.php | 435 + .../playground/data-liberation/src/WP_URL.php | 66 + .../data-liberation/src/functions.php | 294 + .../src/public_suffix_list.php | 1457 +++ .../tests/URLParserWHATWGComplianceTests.php | 133 + .../tests/WPBlockMarkupProcessorTests.php | 353 + .../tests/WPBlockMarkupUrlProcessorTests.php | 191 + .../WPMigrationURLInTextProcessorTests.php | 163 + .../tests/WPRewriteUrlsTests.php | 149 + .../tests/whatwg_url_test_data.json | 9801 +++++++++++++++++ packages/playground/wordpress/src/index.ts | 2 +- 21 files changed, 14402 insertions(+), 1 deletion(-) create mode 100644 packages/playground/data-liberation/bin/regenerate_public_suffix_list.php create mode 100644 packages/playground/data-liberation/bin/rewrite-urls.php create mode 100644 packages/playground/data-liberation/bootstrap.php create mode 100644 packages/playground/data-liberation/composer.json create mode 100644 packages/playground/data-liberation/composer.lock create mode 100644 packages/playground/data-liberation/phpunit.xml create mode 100644 packages/playground/data-liberation/src/README.md create mode 100644 packages/playground/data-liberation/src/WP_Block_Markup_Processor.php create mode 100644 packages/playground/data-liberation/src/WP_Block_Markup_Url_Processor.php create mode 100644 packages/playground/data-liberation/src/WP_Migration_URL_In_Text_Processor.php create mode 100644 packages/playground/data-liberation/src/WP_URL.php create mode 100644 packages/playground/data-liberation/src/functions.php create mode 100644 packages/playground/data-liberation/src/public_suffix_list.php create mode 100644 packages/playground/data-liberation/tests/URLParserWHATWGComplianceTests.php create mode 100644 packages/playground/data-liberation/tests/WPBlockMarkupProcessorTests.php create mode 100644 packages/playground/data-liberation/tests/WPBlockMarkupUrlProcessorTests.php create mode 100644 packages/playground/data-liberation/tests/WPMigrationURLInTextProcessorTests.php create mode 100644 packages/playground/data-liberation/tests/WPRewriteUrlsTests.php create mode 100644 packages/playground/data-liberation/tests/whatwg_url_test_data.json diff --git a/.gitignore b/.gitignore index f9bc1bc23b7..2c123c0b787 100644 --- a/.gitignore +++ b/.gitignore @@ -13,6 +13,8 @@ packages/docs/site/src/model.json .docusaurus dist.zip rollup.d.ts +.phpunit.cache +packages/playground/data-liberation/vendor # dependencies node_modules diff --git a/packages/playground/data-liberation/bin/regenerate_public_suffix_list.php b/packages/playground/data-liberation/bin/regenerate_public_suffix_list.php new file mode 100644 index 00000000000..502be3be2c5 --- /dev/null +++ b/packages/playground/data-liberation/bin/regenerate_public_suffix_list.php @@ -0,0 +1,41 @@ + 1,\n"); +} + +fwrite($fp, ");\n"); + +if(file_exists($php_file_path)) { + unlink($php_file_path); +} +rename($new_php_file_path, $php_file_path); diff --git a/packages/playground/data-liberation/bin/rewrite-urls.php b/packages/playground/data-liberation/bin/rewrite-urls.php new file mode 100644 index 00000000000..a3a6c7af448 --- /dev/null +++ b/packages/playground/data-liberation/bin/rewrite-urls.php @@ -0,0 +1,74 @@ + --file --current-site-url --new-site-url \n"; + echo "Commands:\n"; + echo " list_urls: List all the URLs found in the input file.\n"; + echo " migrate_urls: Migrate all the URLs found in the input file from the current site to the target site.\n"; + exit( 1 ); +} + +$command = $argv[1]; +$options = []; + +for ( $i = 2; $i < $argc; $i ++ ) { + if ( str_starts_with( $argv[ $i ], '--' ) && isset( $argv[ $i + 1 ] ) ) { + $options[ substr( $argv[ $i ], 2 ) ] = $argv[ $i + 1 ]; + $i ++; + } +} + +if ( ! isset( $options['file'] ) ) { + echo "The file option is required.\n"; + exit( 1 ); +} + +$inputFile = $options['file']; +if ( ! file_exists( $inputFile ) ) { + echo "The file $inputFile does not exist.\n"; + exit( 1 ); +} +$block_markup = file_get_contents( $inputFile ); + +// @TODO: Decide – should the current site URL be always required to +// populate $base_url? +$base_url = $options['current-site-url'] ?? 'https://playground.internal'; +$p = new WP_Block_Markup_Url_Processor( $block_markup, $base_url ); + +switch ( $command ) { + case 'list_urls': + echo "URLs found in the markup:\n\n"; + wp_list_urls_in_block_markup( [ 'block_markup' => $block_markup, 'base_url' => $base_url ]); + echo "\n"; + break; + case 'migrate_urls': + if ( ! isset( $options['current-site-url'] ) ) { + echo "The --current-site-url option is required for the migrate_urls command.\n"; + exit( 1 ); + } + if ( ! isset( $options['new-site-url'] ) ) { + echo "The --new-site-url option is required for the migrate_urls command.\n"; + exit( 1 ); + } + + echo "Replacing $base_url with " . $options['new-site-url'] . " in the input.\n\n"; + if (!is_dir('./assets')) { + mkdir('./assets/', 0777, true); + } + $result = wp_migrate_post_content_urls( array( + 'block_markup' => $block_markup, + 'base_url' => $base_url, + 'current-site-url' => $options['current-site-url'], + 'new-site-url' => $options['new-site-url'], + 'local-assets-path' => './assets/' + ) ); + if(!is_string($result)) { + echo "Error! \n"; + print_r($result); + exit( 1 ); + } + echo $result; + break; +} diff --git a/packages/playground/data-liberation/bootstrap.php b/packages/playground/data-liberation/bootstrap.php new file mode 100644 index 00000000000..11f2649f77c --- /dev/null +++ b/packages/playground/data-liberation/bootstrap.php @@ -0,0 +1,40 @@ +=8.0.0" + }, + "type": "library", + "extra": { + "branch-alias": { + "dev-master": "3.x-dev" + } + }, + "autoload": { + "psr-4": { + "Psr\\Log\\": "src" + } + }, + "notification-url": "https://packagist.org/downloads/", + "license": [ + "MIT" + ], + "authors": [ + { + "name": "PHP-FIG", + "homepage": "https://www.php-fig.org/" + } + ], + "description": "Common interface for logging libraries", + "homepage": "https://github.com/php-fig/log", + "keywords": [ + "log", + "psr", + "psr-3" + ], + "support": { + "source": "https://github.com/php-fig/log/tree/3.0.0" + }, + "time": "2021-07-14T16:46:02+00:00" + }, + { + "name": "rowbot/idna", + "version": "0.1.5", + "source": { + "type": "git", + "url": "https://github.com/TRowbotham/idna.git", + "reference": "02d94e4caa435547d2027086c57c69d34e6ed4fa" + }, + "dist": { + "type": "zip", + "url": "https://api.github.com/repos/TRowbotham/idna/zipball/02d94e4caa435547d2027086c57c69d34e6ed4fa", + "reference": "02d94e4caa435547d2027086c57c69d34e6ed4fa", + "shasum": "" + }, + "require": { + "php": ">=7.1", + "rowbot/punycode": "^1.0", + "symfony/polyfill-intl-normalizer": "^1.18" + }, + "require-dev": { + "guzzlehttp/guzzle": "^6.5 || ^7.0", + "phpstan/phpstan": "^1.2", + "phpstan/phpstan-deprecation-rules": "^1.0", + "phpstan/phpstan-strict-rules": "^1.0", + "phpunit/phpunit": "^7.0 || ^8.0 || ^9.0", + "squizlabs/php_codesniffer": "^3.5.1", + "symfony/cache": "^4.3 || ^5.0" + }, + "type": "library", + "autoload": { + "psr-4": { + "Rowbot\\Idna\\": "src/", + "Rowbot\\Idna\\Resource\\": "resources/" + } + }, + "notification-url": "https://packagist.org/downloads/", + "license": [ + "MIT" + ], + "authors": [ + { + "name": "Trevor Rowbotham" + } + ], + "description": "An implementation of UTS#46 Unicode IDNA Compatibility Processing.", + "keywords": [ + "idn", + "idna", + "international domain names", + "iri", + "tr46", + "unicode", + "uts46" + ], + "support": { + "issues": "https://github.com/TRowbotham/idna/issues", + "source": "https://github.com/TRowbotham/idna/tree/0.1.5" + }, + "time": "2022-01-10T19:51:24+00:00" + }, + { + "name": "rowbot/punycode", + "version": "1.0.4", + "source": { + "type": "git", + "url": "https://github.com/TRowbotham/punycode.git", + "reference": "ec116e99cb089a25d45147813d61bfa2deb29328" + }, + "dist": { + "type": "zip", + "url": "https://api.github.com/repos/TRowbotham/punycode/zipball/ec116e99cb089a25d45147813d61bfa2deb29328", + "reference": "ec116e99cb089a25d45147813d61bfa2deb29328", + "shasum": "" + }, + "require": { + "php": ">=7.1" + }, + "require-dev": { + "ext-mbstring": "*", + "phpstan/phpstan": "^1.2", + "phpstan/phpstan-deprecation-rules": "^1.0", + "phpstan/phpstan-strict-rules": "^1.0", + "phpunit/phpunit": "^7.0 || ^8.0 || ^9.0", + "squizlabs/php_codesniffer": "^3.5.1" + }, + "type": "library", + "autoload": { + "psr-4": { + "Rowbot\\Punycode\\": "src/" + } + }, + "notification-url": "https://packagist.org/downloads/", + "license": [ + "MIT" + ], + "authors": [ + { + "name": "Trevor Rowbotham", + "homepage": "https://trowbotham.com", + "role": "Developer" + } + ], + "description": "A Bootstring encoding of Unicode for Internationalized Domain Names in Applications (IDNA).", + "keywords": [ + "punycode", + "rfc-3492", + "rfc3492" + ], + "support": { + "issues": "https://github.com/TRowbotham/punycode/issues", + "source": "https://github.com/TRowbotham/punycode/tree/1.0.4" + }, + "time": "2024-05-03T00:56:10+00:00" + }, + { + "name": "rowbot/url", + "version": "4.0.0", + "source": { + "type": "git", + "url": "https://github.com/TRowbotham/URL-Parser.git", + "reference": "b81e4b4f55ba4fdefaaedebf22d5a060080809f4" + }, + "dist": { + "type": "zip", + "url": "https://api.github.com/repos/TRowbotham/URL-Parser/zipball/b81e4b4f55ba4fdefaaedebf22d5a060080809f4", + "reference": "b81e4b4f55ba4fdefaaedebf22d5a060080809f4", + "shasum": "" + }, + "require": { + "brick/math": "^0.8.13 || ^0.9", + "ext-mbstring": "*", + "php": ">=8.1", + "psr/log": "^3.0", + "rowbot/idna": "^0.1.5" + }, + "require-dev": { + "guzzlehttp/guzzle": "^7.0", + "phpstan/phpstan": "^1.9", + "phpstan/phpstan-deprecation-rules": "^1.0", + "phpstan/phpstan-strict-rules": "^1.0", + "phpunit/phpunit": "^10.0 || ^11.0", + "slevomat/coding-standard": "^8.0", + "squizlabs/php_codesniffer": "^3.7.1", + "symfony/cache": "^5.0 || ^6.0" + }, + "type": "library", + "autoload": { + "psr-4": { + "Rowbot\\URL\\": "src/" + } + }, + "notification-url": "https://packagist.org/downloads/", + "license": [ + "MIT" + ], + "authors": [ + { + "name": "Trevor Rowbotham", + "homepage": "https://trowbotham.com", + "role": "Developer" + } + ], + "description": "A WHATWG URL spec compliant URL parser for working with URLs and their query strings.", + "keywords": [ + "WHATWG", + "query-string", + "querystring", + "uri", + "url", + "url-parser", + "url-parsing" + ], + "support": { + "issues": "https://github.com/TRowbotham/URL-Parser/issues", + "source": "https://github.com/TRowbotham/URL-Parser/tree/4.0.0" + }, + "time": "2024-06-21T01:00:47+00:00" + }, + { + "name": "symfony/polyfill-intl-normalizer", + "version": "v1.30.0", + "source": { + "type": "git", + "url": "https://github.com/symfony/polyfill-intl-normalizer.git", + "reference": "a95281b0be0d9ab48050ebd988b967875cdb9fdb" + }, + "dist": { + "type": "zip", + "url": "https://api.github.com/repos/symfony/polyfill-intl-normalizer/zipball/a95281b0be0d9ab48050ebd988b967875cdb9fdb", + "reference": "a95281b0be0d9ab48050ebd988b967875cdb9fdb", + "shasum": "" + }, + "require": { + "php": ">=7.1" + }, + "suggest": { + "ext-intl": "For best performance" + }, + "type": "library", + "extra": { + "thanks": { + "name": "symfony/polyfill", + "url": "https://github.com/symfony/polyfill" + } + }, + "autoload": { + "files": [ + "bootstrap.php" + ], + "psr-4": { + "Symfony\\Polyfill\\Intl\\Normalizer\\": "" + }, + "classmap": [ + "Resources/stubs" + ] + }, + "notification-url": "https://packagist.org/downloads/", + "license": [ + "MIT" + ], + "authors": [ + { + "name": "Nicolas Grekas", + "email": "p@tchwork.com" + }, + { + "name": "Symfony Community", + "homepage": "https://symfony.com/contributors" + } + ], + "description": "Symfony polyfill for intl's Normalizer class and related functions", + "homepage": "https://symfony.com", + "keywords": [ + "compatibility", + "intl", + "normalizer", + "polyfill", + "portable", + "shim" + ], + "support": { + "source": "https://github.com/symfony/polyfill-intl-normalizer/tree/v1.30.0" + }, + "funding": [ + { + "url": "https://symfony.com/sponsor", + "type": "custom" + }, + { + "url": "https://github.com/fabpot", + "type": "github" + }, + { + "url": "https://tidelift.com/funding/github/packagist/symfony/symfony", + "type": "tidelift" + } + ], + "time": "2024-05-31T15:07:36+00:00" + } + ], + "packages-dev": [], + "aliases": [], + "minimum-stability": "stable", + "stability-flags": [], + "prefer-stable": false, + "prefer-lowest": false, + "platform": [], + "platform-dev": [], + "plugin-api-version": "2.3.0" +} diff --git a/packages/playground/data-liberation/phpunit.xml b/packages/playground/data-liberation/phpunit.xml new file mode 100644 index 00000000000..5788cd5dd43 --- /dev/null +++ b/packages/playground/data-liberation/phpunit.xml @@ -0,0 +1,11 @@ + + + + + tests/WPRewriteUrlsTests.php + + + + diff --git a/packages/playground/data-liberation/src/README.md b/packages/playground/data-liberation/src/README.md new file mode 100644 index 00000000000..ca875411fa6 --- /dev/null +++ b/packages/playground/data-liberation/src/README.md @@ -0,0 +1,109 @@ +## Site Transfer Protocol prototype + +This is an exploration of what could become the WordPress +Site Transfer Protocol: + +https://core.trac.wordpress.org/ticket/60375 + +The current version is focused on finding and rewriting +URLs as well as downloading any related assets. + +Inspect tricky-input.html and tricky-output.html to +see what this repo can do today. + +The next steps here would be to flesh out this README more, +start some issues and discussions, and define the minimal +v1 to ship and stress-test in Playground. + +### Current status + +- URL rewriting works to perhaps the greatest extent it ever did. + There are still corner-cases to discuss and performance optimizations + to reap before making any of this a public API. +- The URL parser requires PHP 8.1. This is fine for some Playground applications, + but we'll more compatibility to get any of this into WordPress core. Also, that + parser uses dozens of classes when we could likely rework it into something as + self-contained like the WP_HTML_Tag_Processor. +- Downloading the assets isn't implemented yet. It feels like there's + no way to even start doing it without a state tracking table as there + might be way more images to download than the PHP time limit allows. + Perhaps that doesn't need to be solved to start using this with Markdown. + +#### Processing tricky inputs + +When this code is fed into the migrator: + +```html + +

+ + 🚀-science.com/science has the best scientific articles on the internet! We're also available via the punycode URL: + + + https://xn---science-7f85g.com/%73%63ience/. + + + This isn't migrated: https://🚀-science.comcast/science
+ Or this: super-🚀-science.com/science +

+ + + + + + + + + + +``` + +This actual output is produced: + +```html + +

+ + science.wordpress.com has the best scientific articles on the internet! We're also available via the punycode URL: + + + https://science.wordpress.com/. + + + This isn't migrated: https://🚀-science.comcast/science
+ Or this: super-🚀-science.com/science +

+ + + + + + + + + + +``` + +### Related projects + +- https://github.com/WordPress/wordpress-playground/discussions/1524 +- https://github.com/adamziel/wxr-normalize +- https://github.com/WordPress/blueprints/pull/52 +- https://github.com/adamziel/playground-docs-workflow +- https://github.com/WordPress/blueprints-library/blob/trunk/src/WordPress/AsyncHttp/Client.php +- https://github.com/WordPress/wordpress-develop/pull/6713 + +### What code do I run? + +To migrate domain names from tricky-input.html, run: + +```shell +php bin/rewrite-urls.php migrate_urls --file ./tricky-input.html --current-site-url https://🚀-science.com/science --new-site-url https://science.wordpress.com +``` + +To list all the URLs found in that file, run: + +```shell +php bin/rewrite-urls.php list_urls --file ./tricky-input.html +``` diff --git a/packages/playground/data-liberation/src/WP_Block_Markup_Processor.php b/packages/playground/data-liberation/src/WP_Block_Markup_Processor.php new file mode 100644 index 00000000000..c578ffe032f --- /dev/null +++ b/packages/playground/data-liberation/src/WP_Block_Markup_Processor.php @@ -0,0 +1,388 @@ +accessible_text_starts_at = $reflection->getProperty( 'text_starts_at' ); + $this->accessible_text_starts_at->setAccessible( true ); + + $this->accessible_text_length = $reflection->getProperty( 'text_length' ); + $this->accessible_text_length->setAccessible( true ); + + $this->accessible_lexical_updates = $reflection->getProperty( 'lexical_updates' ); + $this->accessible_lexical_updates->setAccessible( true ); + } + + + public function get_token_type() { + switch ( $this->parser_state ) { + case self::STATE_COMMENT: + if ( null !== $this->block_name ) { + return '#block-comment'; + } + + return '#comment'; + + default: + return parent::get_token_type(); + } + } + + public function get_modifiable_text() { + if ( null === $this->modifiable_text ) { + $this->modifiable_text = parent::get_modifiable_text(); + } + + return $this->modifiable_text; + } + + /** + * @param mixed $new_content + * + * @return bool + */ + public function set_modifiable_text( $new_value ) { + switch ( parent::get_token_type() ) { + case '#text': + break; + + case '#comment': + case '#cdata-section': + if ( + parent::get_token_type() === '#comment' && ( + strpos( $new_value, '-->' ) !== false || + strpos( $new_value, '--!>' ) !== false + ) + ) { + _doing_it_wrong( + __METHOD__, + __( 'Cannot set a comment closer as a text of an HTML comment.' ), + 'WP_VERSION' + ); + + return false; + } + if ( + $this->get_token_type() === '#cdata-section' && + strpos( $new_value, '>' ) !== false + ) { + _doing_it_wrong( + __METHOD__, + __( 'Cannot set a CDATA closer as text of an HTML CDATA-lookalike section.' ), + 'WP_VERSION' + ); + + return false; + } + + break; + default: + _doing_it_wrong( + __METHOD__, + __( 'Cannot set text content on a non-text node.' ), + 'WP_VERSION' + ); + + return false; + } + + $this->modifiable_text_updated = true; + $this->modifiable_text = $new_value; + + return true; + } + + /** + * Returns the name of the block if the current token is a block comment. + * + * @return string|false + */ + public function get_block_name() { + if ( null === $this->block_name ) { + return false; + } + + return $this->block_name; + } + + public function get_block_attributes() { + if ( null === $this->block_attributes ) { + return false; + } + + return $this->block_attributes; + } + + public function is_block_closer() { + return $this->block_name !== null && $this->block_closer === true; + } + + public function next_token() { + $this->get_updated_html(); + + $this->block_name = null; + $this->block_attributes = null; + $this->block_closer = false; + $this->block_attributes_updated = false; + $this->modifiable_text = null; + $this->modifiable_text_updated = false; + + if ( parent::next_token() === false ) { + return false; + } + + if ( parent::get_token_type() !== '#comment' ) { + return true; + } + + $text = parent::get_modifiable_text(); + // Try to parse as a block. The block parser won't cut it because + // while it can parse blocks, it has no semantics for rewriting the + // block markup. Let's do our best here: + $at = strspn( $text, ' \t\f\r\n' ); // Whitespace + + if ( $at >= strlen( $text ) ) { + // This is an empty comment. Not a block. + return true; + } + + // Blocks closers start with the solidus character (`/`) + if ( '/' === $text[ $at ] ) { + $this->block_closer = true; + ++ $at; + } + + // Blocks start with wp: + if ( ! ( + $at + 3 < strlen( $text ) && + $text[ $at ] === 'w' && + $text[ $at + 1 ] === 'p' && + $text[ $at + 2 ] === ':' + ) ) { + return true; + } + + $name_starts_at = $at; + + // Skip wp: + $at += 3; + + // Parse the actual block name after wp: + $name_length = strspn( $text, 'abcdefghijklmnopqrstuwxvyzABCDEFGHIJKLMNOPRQSTUWXVYZ0123456789_-', $at ); + if ( $name_length === 0 ) { + // This wasn't a block after all, just a regular comment. + return true; + } + $name = substr( $text, $name_starts_at, $name_length + 3 ); + $at += $name_length; + + // Skip the whitespace that follows the block name + $at += strspn( $text, ' \t\f\r\n', $at ); + if ( $at >= strlen( $text ) ) { + // It's a block without attributes. + $this->block_name = $name; + + return true; + } + + // It seems we may have block attributes here. + + // Block closers cannot have attributes. + if ( $this->block_closer ) { + return true; + } + + // Let's try to parse them as JSON. + $json_maybe = substr( $text, $at ); + $attributes = json_decode( $json_maybe, true ); + if ( null === $attributes || ! is_array( $attributes ) ) { + // This comment looked like a block comment, but the attributes didn't + // parse as a JSON array. This means it wasn't a block after all. + return true; + } + + // We have a block name and a valid attributes array. We may not find a block + // closer, but let's assume is a block and process it as such. + // @TODO: Confirm that WordPress block parser would have parsed this as a block. + $this->block_name = $name; + $this->block_attributes = $attributes; + + return true; + } + + public function get_updated_html() { + $this->block_attribute_updates_to_modifiable_text_updates(); + $new_text_length = $this->modifiable_text_updates_to_lexical_updates(); + $text_starts_at = $this->accessible_text_starts_at->getValue( $this ); + + $updated_html = parent::get_updated_html(); + + if ( false !== $new_text_length ) { + /** + * Correct the invalid text indices moved by WP_HTML_Tag_Processor when + * updating the modifiable text. + * @TODO: Fix that directly in the WP_HTML_Tag_Processor. + */ + $this->accessible_text_length->setValue( $this, $new_text_length ); + $this->accessible_text_starts_at->setValue( $this, $text_starts_at ); + } + + return $updated_html; + } + + private function block_attribute_updates_to_modifiable_text_updates() { + // Apply block attribute updates, if any. + if ( ! $this->block_attributes_updated ) { + return false; + } + $this->set_modifiable_text( + ' ' . + $this->block_name . ' ' . + json_encode( + $this->block_attributes_iterator + ? $this->block_attributes_iterator->getSubIterator( 0 )->getArrayCopy() + : $this->block_attributes, + JSON_HEX_TAG | // Convert < and > to \u003C and \u003E + JSON_HEX_AMP // Convert & to \u0026 + ) + . ' ' + ); + + return true; + } + + private function modifiable_text_updates_to_lexical_updates() { + /** + * Applies modifiable text updates, if any. + * + * Don't do this at home :-) Changes access to private properties of the + * WP_HTML_Tag_Processor class to enable changing the text content of a + * node. + */ + if ( ! $this->modifiable_text_updated ) { + return false; + } + + $new_value = $this->get_modifiable_text(); + switch ( parent::get_token_type() ) { + case '#text': + $lexical_updates_now = $this->accessible_lexical_updates->getValue( $this ); + $lexical_updates_now[] = new WP_HTML_Text_Replacement( + $this->accessible_text_starts_at->getValue( $this ), + $this->accessible_text_length->getValue( $this ), + htmlspecialchars( $new_value, ENT_XML1, 'UTF-8' ) + ); + $this->accessible_lexical_updates->setValue( $this, $lexical_updates_now ); + break; + + case '#comment': + case '#cdata-section': + $lexical_updates_now = $this->accessible_lexical_updates->getValue( $this ); + $lexical_updates_now[] = new WP_HTML_Text_Replacement( + $this->accessible_text_starts_at->getValue( $this ), + $this->accessible_text_length->getValue( $this ), + $new_value + ); + $this->accessible_lexical_updates->setValue( $this, $lexical_updates_now ); + break; + + default: + return false; + } + $this->modifiable_text_updated = false; + + return strlen( $new_value ); + } + + public function next_block_attribute() { + if ( '#block-comment' !== $this->get_token_type() ) { + return false; + } + + if ( null === $this->block_attributes_iterator ) { + $block_attributes = $this->get_block_attributes(); + if ( ! is_array( $block_attributes ) ) { + return false; + } + // Re-entrant iteration over the block attributes. + $this->block_attributes_iterator = new \RecursiveIteratorIterator( + new \RecursiveArrayIterator( $block_attributes ), + \RecursiveIteratorIterator::SELF_FIRST + ); + } + + while ( true ) { + $this->block_attributes_iterator->next(); + if ( ! $this->block_attributes_iterator->valid() ) { + break; + } + return true; + } + + return false; + } + + public function get_block_attribute_key() { + if ( null === $this->block_attributes_iterator || false === $this->block_attributes_iterator->valid() ) { + return false; + } + + return $this->block_attributes_iterator->key(); + } + + public function get_block_attribute_value() { + if ( null === $this->block_attributes_iterator || false === $this->block_attributes_iterator->valid() ) { + return false; + } + + return $this->block_attributes_iterator->current(); + } + + public function set_block_attribute_value( $new_value ) { + if ( null === $this->block_attributes_iterator || false === $this->block_attributes_iterator->valid() ) { + return false; + } + + $this->block_attributes_iterator->getSubIterator( + $this->block_attributes_iterator->getDepth() + )->offsetSet( + $this->get_block_attribute_key(), + $new_value + ); + $this->block_attributes_updated = true; + + return true; + } + +} diff --git a/packages/playground/data-liberation/src/WP_Block_Markup_Url_Processor.php b/packages/playground/data-liberation/src/WP_Block_Markup_Url_Processor.php new file mode 100644 index 00000000000..9bea4ef513a --- /dev/null +++ b/packages/playground/data-liberation/src/WP_Block_Markup_Url_Processor.php @@ -0,0 +1,286 @@ +base_url = $base_url; + } + + public function get_updated_html() { + if ( $this->url_in_text_node_updated ) { + $this->set_modifiable_text( $this->url_in_text_processor->get_updated_text() ); + $this->url_in_text_node_updated = false; + } + + return parent::get_updated_html(); + } + + public function get_raw_url() { + return $this->raw_url; + } + + public function get_parsed_url() { + return $this->parsed_url; + } + + public function next_token() { + $this->get_updated_html(); + + $this->raw_url = null; + $this->parsed_url = null; + $this->inspected_url_attribute_idx = - 1; + $this->url_in_text_processor = null; + // Do not reset url_in_text_node_updated – it's reset in get_updated_html() which + // is called in parent::next_token(). + + return parent::next_token(); + } + + public function next_url() { + do { + if ( $this->next_url_in_current_token() ) { + return true; + } + } while ( $this->next_token() !== false ); + + return false; + } + + public function next_url_in_current_token() { + $this->raw_url = null; + switch ( parent::get_token_type() ) { + case '#tag': + return $this->next_url_attribute(); + case '#block-comment': + return $this->next_url_block_attribute(); + case '#text': + return $this->next_url_in_text_node(); + default: + return false; + } + } + + private function next_url_in_text_node() { + if ( $this->get_token_type() !== '#text' ) { + return false; + } + + if ( null === $this->url_in_text_processor ) { + /* + * Use the base URL for URLs matched in text nodes. This is the only + * way to recognize a substring "WordPress.org" as a URL. We might + * get some false positives this way, e.g. in this string: + * + * > And that's how you build a theme.Now let's take a look at..." + * + * `theme.Now` would be recognized as a URL. It's up to the API consumer + * to filter out such false positives e.g. by checking the domain against + * a list of accepted domains, or the TLD against a list of public suffixes. + */ + $this->url_in_text_processor = new WP_Migration_URL_In_Text_Processor( $this->get_modifiable_text(), $this->base_url ); + } + + while ( $this->url_in_text_processor->next_url() ) { + $this->raw_url = $this->url_in_text_processor->get_raw_url(); + $this->parsed_url = $this->url_in_text_processor->get_parsed_url(); + + return true; + } + + return false; + } + + private function next_url_attribute() { + $tag = $this->get_tag(); + if ( + ! array_key_exists( $tag, self::URL_ATTRIBUTES ) && + $tag !== 'INPUT' // type=image => src, + ) { + return false; + } + + while ( ++ $this->inspected_url_attribute_idx < count( self::URL_ATTRIBUTES[ $tag ] ) ) { + $attr = self::URL_ATTRIBUTES[ $tag ][ $this->inspected_url_attribute_idx ]; + if ( false === $attr ) { + return false; + } + + $url_maybe = $this->get_attribute( $attr ); + /* + * Use base URL to resolve known URI attributes as we are certain we're + * dealing with URI values. + * With a base URL, the string "plugins.php" in will + * be correctly recognized as a URL. + * Without a base URL, this Processor would incorrectly skip it. + */ + if ( is_string( $url_maybe ) ) { + $parsed_url = WP_URL::parse( $url_maybe, $this->base_url ); + if ( false !== $parsed_url ) { + $this->raw_url = $url_maybe; + $this->parsed_url = $parsed_url; + + return true; + } + } + } + + return false; + } + + private function next_url_block_attribute() { + while ( $this->next_block_attribute() ) { + $url_maybe = $this->get_block_attribute_value(); + /* + * Do not use base URL for block attributes. to avoid false positives. + * When a base URL is present, any word is a valid URL relative to the + * base URL. + * When a base URL is missing, the string must start with a protocol to + * be considered a URL. + */ + if ( is_string( $url_maybe ) ) { + $parsed_url = WP_URL::parse( $url_maybe ); + if ( false !== $parsed_url ) { + $this->raw_url = $url_maybe; + $this->parsed_url = $parsed_url; + + return true; + } + } + } + + return false; + } + + public function set_raw_url( $new_url ) { + if ( null === $this->raw_url ) { + return false; + } + switch ( parent::get_token_type() ) { + case '#tag': + $attr = $this->get_inspected_attribute_name(); + if ( false === $attr ) { + return false; + } + $this->set_attribute( $attr, $new_url ); + + return true; + + case '#block-comment': + return $this->set_block_attribute_value( $new_url ); + + case '#text': + if ( null === $this->url_in_text_processor ) { + return false; + } + $this->url_in_text_node_updated = true; + + return $this->url_in_text_processor->set_raw_url( $new_url ); + } + } + + public function get_inspected_attribute_name() { + if ( '#tag' !== $this->get_token_type() ) { + return false; + } + + $tag = $this->get_tag(); + if ( ! array_key_exists( $tag, self::URL_ATTRIBUTES ) ) { + return false; + } + + if ( + $this->inspected_url_attribute_idx < 0 || + $this->inspected_url_attribute_idx >= count( self::URL_ATTRIBUTES[ $tag ] ) + ) { + return false; + } + + return self::URL_ATTRIBUTES[ $tag ][ $this->inspected_url_attribute_idx ]; + } + + + /** + * A list of HTML attributes meant to contain URLs, as defined in the HTML specification. + * It includes some deprecated attributes like `lowsrc` and `highsrc` for the `IMG` element. + * + * See https://html.spec.whatwg.org/multipage/indices.html#attributes-1. + * See https://stackoverflow.com/questions/2725156/complete-list-of-html-tag-attributes-which-have-a-url-value. + * + */ + public const URL_ATTRIBUTES = [ + 'A' => [ 'href' ], + 'APPLET' => [ 'codebase', 'archive' ], + 'AREA' => [ 'href' ], + 'AUDIO' => [ 'src' ], + 'BASE' => [ 'href' ], + 'BLOCKQUOTE' => [ 'cite' ], + 'BODY' => [ 'background' ], + 'BUTTON' => [ 'formaction' ], + 'COMMAND' => [ 'icon' ], + 'DEL' => [ 'cite' ], + 'EMBED' => [ 'src' ], + 'FORM' => [ 'action' ], + 'FRAME' => [ 'longdesc', 'src' ], + 'HEAD' => [ 'profile' ], + 'HTML' => [ 'manifest' ], + 'IFRAME' => [ 'longdesc', 'src' ], + // SVG element + 'IMAGE' => [ 'href' ], + 'IMG' => [ 'longdesc', 'src', 'usemap', 'lowsrc', 'highsrc' ], + 'INPUT' => [ 'src', 'usemap', 'formaction' ], + 'INS' => [ 'cite' ], + 'LINK' => [ 'href' ], + 'OBJECT' => [ 'classid', 'codebase', 'data', 'usemap' ], + 'Q' => [ 'cite' ], + 'SCRIPT' => [ 'src' ], + 'SOURCE' => [ 'src' ], + 'TRACK' => [ 'src' ], + 'VIDEO' => [ 'poster', 'src' ], + ]; + + /** + * @TODO: Either explicitly support these attributes, or explicitly drop support for + * handling their subsyntax. A generic URL matcher might be good enough. + */ + public const URL_ATTRIBUTES_WITH_SUBSYNTAX = [ + '*' => [ 'style' ], // background(), background-image() + 'APPLET' => [ 'archive' ], + 'IMG' => [ 'srcset' ], + 'META' => [ 'content' ], + 'SOURCE' => [ 'srcset' ], + 'OBJECT' => [ 'archive' ], + ]; + + /** + * Also + * + * @TODO: Either explicitly support these tags, or explicitly drop support for + * handling their subsyntax. A generic URL matcher might be good enough. + */ + public const URL_CONTAINING_TAGS_WITH_SUBSYNTAX = [ + 'STYLE', + 'SCRIPT', + ]; + +} diff --git a/packages/playground/data-liberation/src/WP_Migration_URL_In_Text_Processor.php b/packages/playground/data-liberation/src/WP_Migration_URL_In_Text_Processor.php new file mode 100644 index 00000000000..d032555601f --- /dev/null +++ b/packages/playground/data-liberation/src/WP_Migration_URL_In_Text_Processor.php @@ -0,0 +1,435 @@ + Więcej na łąka.pl + * + * Would yield `łąka.pl` + * + * ### Paths + * + * The path is limited to ASCII characters, as per the URL specification. + * For example, scanning the text: + * + * > Visit the WordPress plugins directory https://w.org/plugins?łąka=1 + * + * Would yield `https://w.org/plugins?`, not `https://w.org/plugins?łąka=1`. + * However, scanning this text: + * + * > Visit the WordPress plugins directory https://w.org/plugins?%C5%82%C4%85ka=1 + * + * Would yield `https://w.org/plugins?%C5%82%C4%85ka=1`. + * + * ### Parenthesis treatment + * + * This scanner captures parentheses as a part of the path, query, or fragment, except + * when they're seen as the last character in the URL. For example, scanning the text: + * + * > Visit the WordPress plugins directory (https://w.org/plugins) + * + * Would yield `https://w.org/plugins`, but scanning the text: + * + * > Visit the WordPress plugins directory (https://w.org/plug(in)s + * + * Would yield `https://w.org/plug(in)s`. + * + */ +class WP_Migration_URL_In_Text_Processor { + + private $text; + private $url_starts_at; + private $url_length; + private $bytes_already_parsed = 0; + /** + * @var string + */ + private $raw_url; + /** + * @var URL + */ + private $parsed_url; + private $did_prepend_protocol; + /** + * The base URL for the parsing algorithm. + * See https://url.spec.whatwg.org/. + * + * @var mixed|null + */ + private $base_url; + private $base_protocol; + + /** + * The regular expression pattern used for the matchin URL candidates + * from the text. + * + * @var string + */ + private $regex; + + /** + * @see WP_HTML_Tag_Processor + */ + private $lexical_updates = array(); + + /** + * @var bool + * A flag to indicate whether the URL matching should be strict or not. + * If set to true, the matching will be strict, meaning it will only match URLs that strictly adhere to the pattern. + * If set to false, the matching will be more lenient, allowing for potential false positives. + */ + private $strict = false; + static private $public_suffix_list; + + + public function __construct( $text, $base_url = null ) { + if ( ! self::$public_suffix_list ) { + // @TODO: Parse wildcards and exceptions from the public suffix list. + self::$public_suffix_list = require_once __DIR__ . '/public_suffix_list.php'; + } + $this->text = $text; + $this->base_url = $base_url; + $this->base_protocol = $base_url ? parse_url( $base_url, PHP_URL_SCHEME ) : null; + + $prefix = $this->strict ? '^' : ''; + $suffix = $this->strict ? '$' : ''; + + // Source: https://github.com/vstelmakh/url-highlight/blob/master/src/Matcher/Matcher.php + $this->regex = '/' . $prefix . ' + (?: # scheme + (?https?:)? # Only consider http and https + \/\/ # The protocol does not have to be there, but when + # it is, is must be followed by \/\/ + )? + (?: # userinfo + (?: + (?<=\/{2}) # prefixed with \/\/ + | # or + (?=[^\p{Sm}\p{Sc}\p{Sk}\p{P}]) # start with not: mathematical, currency, modifier symbol, punctuation + ) + (?[^\s<>@\/]+) # not: whitespace, < > @ \/ + @ # at + )? + (?=[^\p{Z}\p{Sm}\p{Sc}\p{Sk}\p{C}\p{P}]) # followed by valid host char + (?| # host + (? # host prefixed by scheme or userinfo (less strict) + (?<=\/\/|@) # prefixed with \/\/ or @ + (?=[^\-]) # label start, not: - + (?:[^\p{Z}\p{Sm}\p{Sc}\p{Sk}\p{C}\p{P}]|-){1,63} # label not: whitespace, mathematical, currency, modifier symbol, control point, punctuation | except - + (?<=[^\-]) # label end, not: - + (?: # more label parts + \. + (?=[^\-]) # label start, not: - + (?(?:[^\p{Z}\p{Sm}\p{Sc}\p{Sk}\p{C}\p{P}]|-){1,63}) # label not: whitespace, mathematical, currency, modifier symbol, control point, punctuation | except - + (?<=[^\-]) # label end, not: - + )* + ) + | # or + (? # host with tld (no scheme or userinfo) + (?=[^\-]) # label start, not: - + (?:[^\p{Z}\p{Sm}\p{Sc}\p{Sk}\p{C}\p{P}]|-){1,63} # label not: whitespace, mathematical, currency, modifier symbol, control point, punctuation | except - + (?<=[^\-]) # label end, not: - + (?: # more label parts + \. + (?=[^\-]) # label start, not: - + (?:[^\p{Z}\p{Sm}\p{Sc}\p{Sk}\p{C}\p{P}]|-){1,63} # label not: whitespace, mathematical, currency, modifier symbol, control point, punctuation | except - + (?<=[^\-]) # label end, not: - + )* + \.(?\w{2,63}) # tld + ) + ) + (?:\:(?\d+))? # port + (? # path, query, fragment + [\/?] # prefixed with \/ or ? + [^\s<>]* # any chars except whitespace and <> + (?<=[^\s<>({\[`!;:\'".,?«»“”‘’]) # end with not a space or some punctuation chars + )? + ' . $suffix . '/ixuJ'; + } + + /** + * @return string + */ + public function next_url() { + $this->raw_url = null; + $this->parsed_url = null; + $this->url_starts_at = null; + $this->url_length = null; + $this->did_prepend_protocol = false; + while ( true ) { + /** + * Thick sieve – eagerly match things that look like URLs but turn out to not be URLs in the end. + */ + $matches = []; + $found = preg_match( $this->regex, $this->text, $matches, PREG_OFFSET_CAPTURE, $this->bytes_already_parsed ); + if ( 1 !== $found ) { + return false; + } + + $matched_url = $matches[0][0]; + if ( + $matched_url[ strlen( $matched_url ) - 1 ] === ')' || + $matched_url[ strlen( $matched_url ) - 1 ] === '.' + ) { + $matched_url = substr( $matched_url, 0, - 1 ); + } + $this->bytes_already_parsed = $matches[0][1] + strlen( $matched_url ); + + $had_double_slash = WP_URL::has_double_slash( $matched_url ); + + $url_to_parse = $matched_url; + if ( $this->base_url && $this->base_protocol && ! $had_double_slash ) { + $url_to_parse = WP_URL::ensure_protocol( $url_to_parse, $this->base_protocol ); + $this->did_prepend_protocol = true; + } + + /* + * Extra fine sieve – parse the candidates using a WHATWG-compliant parser to rule out false positives. + */ + $parsed_url = WP_URL::parse( $url_to_parse, $this->base_url ); + if ( false === $parsed_url ) { + continue; + } + + // Additional rigor for URLs that are not explicitly preceded by a double slash. + if ( ! $had_double_slash ) { + /* + * Skip TLDs that are not in the public suffix. + * This reduces false positives like `index.html` or `plugins.php`. + * + * See https://publicsuffix.org/. + */ + $last_dot_position = strrpos( $parsed_url->hostname, '.' ); + if ( false === $last_dot_position ) { + /* + * Oh, there was no dot in the hostname AND no double slash at + * the beginning! Let's assume this isn't a valid URL and move on. + * @TODO: Explore updating the regular expression above to avoid matching + * URLs without a dot in the hostname when they're not preceeded + * by a protocol. + */ + continue; + } + + $tld = substr( $parsed_url->hostname, $last_dot_position + 1 ); + if ( empty( self::$public_suffix_list[ $tld ] ) ) { + // This TLD is not in the public suffix list. It's not a valid domain name. + continue; + } + } + + $this->parsed_url = $parsed_url; + $this->raw_url = $matched_url; + $this->url_starts_at = $matches[0][1]; + $this->url_length = strlen( $matches[0][0] ); + + return true; + } + } + + public function get_raw_url() { + if ( null === $this->raw_url ) { + return false; + } + + return $this->raw_url; + } + + public function get_parsed_url() { + if ( null === $this->parsed_url ) { + return false; + } + + return $this->parsed_url; + } + + public function set_raw_url( $new_url ) { + if ( null === $this->raw_url ) { + return false; + } + if ( $this->did_prepend_protocol ) { + $new_url = substr( $new_url, strpos( $new_url, '://' ) + 3 ); + } + $this->raw_url = $new_url; + $this->lexical_updates[ $this->url_starts_at ] = new WP_HTML_Text_Replacement( + $this->url_starts_at, + $this->url_length, + $new_url + ); + + return true; + } + + private function apply_lexical_updates() { + if ( ! count( $this->lexical_updates ) ) { + return 0; + } + + /* + * Attribute updates can be enqueued in any order but updates + * to the document must occur in lexical order; that is, each + * replacement must be made before all others which follow it + * at later string indices in the input document. + * + * Sorting avoid making out-of-order replacements which + * can lead to mangled output, partially-duplicated + * attributes, and overwritten attributes. + */ + + ksort( $this->lexical_updates ); + + $bytes_already_copied = 0; + $output_buffer = ''; + foreach ( $this->lexical_updates as $diff ) { + $shift = strlen( $diff->text ) - $diff->length; + + // Adjust the cursor position by however much an update affects it. + if ( $diff->start < $this->bytes_already_parsed ) { + $this->bytes_already_parsed += $shift; + } + + $output_buffer .= substr( $this->text, $bytes_already_copied, $diff->start - $bytes_already_copied ); + if ( $diff->start === $this->url_starts_at ) { + $this->url_starts_at = strlen( $output_buffer ); + $this->url_length = strlen( $diff->text ); + } + $output_buffer .= $diff->text; + $bytes_already_copied = $diff->start + $diff->length; + } + + $this->text = $output_buffer . substr( $this->text, $bytes_already_copied ); + $this->lexical_updates = array(); + } + + public function get_updated_text() { + $this->apply_lexical_updates(); + + return $this->text; + } + + /** + * Characters that are forbidden in the host part of a URL. + * See https://url.spec.whatwg.org/#host-miscellaneous. + */ + private const FORBIDDEN_HOST_BYTES = "\x00\x09\x0a\x0d\x20\x23\x2f\x3a\x3c\x3e\x3f\x40\x5b\x5c\x5d\x5e\x7c"; + private const FORBIDDEN_DOMAIN_BYTES = "\x00\x09\x0a\x0d\x20\x23\x25\x2f\x3a\x3c\x3e\x3f\x40\x5b\x5c\x5d\x5e\x7c\x7f"; + /** + * Unlike RFC 3986, the WHATWG URL specification does not the domain part of + * a URL to any length. That being said, we apply an arbitrary limit here as + * an optimization to avoid scanning the entire text for a domain name. + * + * Rationale: Domains larger than 1KB are extremely rare. The WHATWG URL + */ + private const CONSIDER_DOMAINS_UP_TO_BYTES = 1024; + + /** + * An exploration to match URLs without using regular expressions. + * Need to benchmark and rigorously test the current next_url() + * implementation. We may either: + * + * * Be fine with preg_match in next_url() + * * Need a custom implementation like this one + * * Be forced to ditch this approach entirely and find a way to plug + * in a proper WHATWG-compliant URL parser into the task of finding + * URLs in text. This may or may not be possible/viable. + * @wip + */ + private function experimental_next_url_without_regexs() { + $at = $this->bytes_already_parsed; + + // Find the next dot in the text + $dot_at = strpos( $this->text, '.', $at ); + + // If there's no dot, assume there's no URL + if ( false === $dot_at ) { + return false; + } + + // The shortest tld is 2 characters long + if ( $dot_at + 2 >= strlen( $this->text ) ) { + return false; + } + + $host_bytes_after_dot = strcspn( + $this->text, + self::FORBIDDEN_DOMAIN_BYTES, + $dot_at + 1, + self::CONSIDER_DOMAINS_UP_TO_BYTES + ); + + if ( 0 === $host_bytes_after_dot ) { + return false; + } + + // Lookbehind to capture the rest of the domain name up to a forbidden character. + $host_bytes_before_dot = strcspn( + $this->text_rev, + self::FORBIDDEN_DOMAIN_BYTES, + strlen( $this->text ) - $dot_at - 1, + self::CONSIDER_DOMAINS_UP_TO_BYTES + ); + + $host_starts_at = $dot_at - $host_bytes_before_dot; + + // Capture the protocol, if any + $has_double_slash = false; + if ( $host_starts_at > 2 ) { + if ( '/' === $this->text[ $host_starts_at - 1 ] && '/' === $this->text[ $host_starts_at - 2 ] ) { + $has_double_slash = true; + } + } + + /** + * Look for http or https at the beginning of the URL. + * @TODO: Ensure the character before http or https is a word boundary. + */ + $has_protocol = false; + if ( $has_double_slash && ( + ( + $host_starts_at >= 6 && + 'h' === $this->text[ $host_starts_at - 6 ] && + 't' === $this->text[ $host_starts_at - 5 ] && + 't' === $this->text[ $host_starts_at - 4 ] && + 'p' === $this->text[ $host_starts_at - 3 ] + ) || + ( + $host_starts_at >= 7 && + 'h' === $this->text[ $host_starts_at - 7 ] && + 't' === $this->text[ $host_starts_at - 6 ] && + 't' === $this->text[ $host_starts_at - 5 ] && + 'p' === $this->text[ $host_starts_at - 4 ] && + 's' === $this->text[ $host_starts_at - 3 ] + ) + ) ) { + $has_protocol = true; + } + + // Move the pointer to the end of the host + $at = $dot_at + $host_bytes_after_dot; + } + + +} + diff --git a/packages/playground/data-liberation/src/WP_URL.php b/packages/playground/data-liberation/src/WP_URL.php new file mode 100644 index 00000000000..60d9cc61ae8 --- /dev/null +++ b/packages/playground/data-liberation/src/WP_URL.php @@ -0,0 +1,66 @@ + 2 && + '/' === $raw_url[0] && + '/' === $raw_url[1] + ) || ( + strlen( $raw_url ) > 7 && + 'h' === $raw_url[0] && + 't' === $raw_url[1] && + 't' === $raw_url[2] && + 'p' === $raw_url[3] && + ':' === $raw_url[4] && + '/' === $raw_url[5] && + '/' === $raw_url[6] + ) || ( + strlen( $raw_url ) > 8 && + 'h' === $raw_url[0] && + 't' === $raw_url[1] && + 't' === $raw_url[2] && + 'p' === $raw_url[3] && + 's' === $raw_url[4] && + ':' === $raw_url[5] && + '/' === $raw_url[6] && + '/' === $raw_url[7] + ) + ); + } + +} diff --git a/packages/playground/data-liberation/src/functions.php b/packages/playground/data-liberation/src/functions.php new file mode 100644 index 00000000000..1cec4b3417c --- /dev/null +++ b/packages/playground/data-liberation/src/functions.php @@ -0,0 +1,294 @@ + wp_rewrite_urls('') + * + * ``` + * + * @TODO Use a proper JSON parser and encoder to: + * * Support UTF-16 characters + * * Gracefully handle recoverable encoding issues + * * Avoid changing the whitespace in the same manner as + * we do in WP_HTML_Tag_Processor + */ +function wp_rewrite_urls($options) { + if(empty($options['base_url'])) { + $options['base_url'] = $options['current-site-url']; + } + + $string_new_site_url = $options['new-site-url']; + $parsed_new_site_url = WP_URL::parse( $string_new_site_url ); + $parsed_current_site_url = WP_URL::parse( $options['current-site-url'] ); + + $p = new WP_Block_Markup_Url_Processor( $options['block_markup'], $options['base_url'] ); + $generator = iterate_urls($p, $options['current-site-url']); + foreach($generator as $p) { + $parsed_matched_url = $p->get_parsed_url(); + // Let's rewrite the URL + $parsed_matched_url->protocol = $parsed_new_site_url->protocol; + $parsed_matched_url->hostname = $parsed_new_site_url->hostname; + $decoded_matched_pathname = urldecode( $parsed_matched_url->pathname ); + + // Short-circuit for empty pathnames + if ('/' !== $parsed_current_site_url->pathname) { + $parsed_matched_url->pathname = + $parsed_new_site_url->pathname . + substr( + $decoded_matched_pathname, + // @TODO: Why is + 1 needed to avoid a double slash in the pathname? + strlen(urldecode($parsed_current_site_url->pathname)) + 1 + ); + } + + /* + * Stylistic choice – if the matched URL has no trailing slash, + * do not add it to the new URL. The WHATWG URL parser will + * add one automatically if the path is empty, so we have to + * explicitly remove it. + */ + $new_raw_url = $parsed_matched_url->toString(); + $raw_matched_url = $p->get_raw_url(); + if ( + $raw_matched_url[strlen($raw_matched_url) - 1] !== '/' && + $parsed_matched_url->pathname === '/' && + $parsed_matched_url->search === '' && + $parsed_matched_url->hash === '' + ) { + $new_raw_url = rtrim($new_raw_url, '/'); + } + if ($new_raw_url) { + $p->set_raw_url($new_raw_url); + } + } + return $p->get_updated_html(); +} + +/** + * + * @param mixed $options + * @return Generator + */ +function iterate_urls($p, $current_site_url) { + $parsed_current_site_url = WP_URL::parse( $current_site_url ); + $decoded_current_site_pathname = urldecode( $parsed_current_site_url->pathname ); + + while ( $p->next_url() ) { + $parsed_matched_url = $p->get_parsed_url(); + if ( $parsed_matched_url->hostname === $parsed_current_site_url->hostname ) { + $decoded_matched_pathname = urldecode( $parsed_matched_url->pathname ); + $pathname_matches = str_starts_with( $decoded_matched_pathname, $decoded_current_site_pathname ); + if ( ! $pathname_matches ) { + continue; + } + + // It's a match! + yield $p; + } + } + return $p->get_updated_html(); +} + + +function wp_list_urls_in_block_markup( $options ) { + $block_markup = $options['block_markup']; + $base_url = $options['base_url'] ?? 'https://playground.internal'; + $p = new WP_Block_Markup_Url_Processor( $block_markup, $base_url ); + while ( $p->next_url() ) { + // Skip empty relative URLs. + if ( ! trim( $p->get_raw_url() ) ) { + continue; + } + echo '* '; + switch ( $p->get_token_type() ) { + case '#tag': + echo 'In <' . $p->get_tag() . '> tag attribute "' . $p->get_inspected_attribute_name() . '": '; + break; + case '#block-comment': + echo 'In a ' . $p->get_block_name() . ' block attribute "' . $p->get_block_attribute_key() . '": '; + break; + case '#text': + echo 'In #text: '; + break; + } + echo $p->get_raw_url() . "\n"; + } +} + +/** + * Migrating assets, network operations. + */ +function wp_migrate_post_content_urls($options) { + $result = wp_frontload_assets($options); + if($result !== true) { + return $result; + } + return wp_rewrite_urls($options); +} + +function wp_frontload_assets( $options ) { + $local_assets_path = $options['local-assets-path']; + + $p = new WP_Block_Markup_Url_Processor( $options['block_markup'], $options['base_url'] ); + + $assets_urls = []; + foreach(iterate_urls($p, $options['current-site-url']) as $p) { + $parsed_matched_url = $p->get_parsed_url(); + // @TODO use an actual decoder, not PHP's urldecode + $pathname = urldecode($parsed_matched_url->pathname); + $pathname = normalize_path($pathname); + $pathname = ltrim($pathname, '/'); + if(!str_contains($pathname, '.')) { + continue; + } + + $local_path = join_paths($local_assets_path, $pathname); + + $ext = pathinfo($pathname, PATHINFO_EXTENSION); + if(!is_dir(dirname($local_path))) { + mkdir(dirname($local_path), 0777, true); + } + + if($ext === 'php') { + continue; + } + $assets_urls[$parsed_matched_url->href] = $local_path; + } + + $download_results = wp_download_files([ + 'concurrency' => 10, + 'assets' => $assets_urls, + ]); + + $failures = array_filter($download_results, function($result) { + return !$result['success']; + }); + if(!empty($failures)) { + return $failures; + } + return true; +} + +/** + * Downloads the assets from the given URLs to the local paths. + * Only downloads the files that are not already present locally. + * + * For now it uses curl. Before merging to WordPress core we should switch to + * https://github.com/WordPress/blueprints-library/blob/trunk/src/WordPress/AsyncHttp/Client.php. + * To use in Playground, we'll need to delegate to `fetch()` until we support + * curl in the browser. + * + * @param array $options { + * @type int|null concurrency How many concurrent downloads to run. + * @type string assets An array of { remote URL => local path }. + * } + * @return array An array of download errors in format { success => boolean, remote URL => error }. + */ +function wp_download_files($options) { + $requests = []; + $local_paths = []; + foreach ($options['assets'] as $asset_url => $local_file) { + $request = new Request($asset_url); + $requests[] = $request; + $local_paths[$request->id] = $local_file; + } + + $client = new Client( [ + 'concurrency' => 10, + ] ); + $client->enqueue( $requests ); + + $results = []; + while ( $client->await_next_event() ) { + $request = $client->get_request(); + + switch ( $client->get_event() ) { + case Client::EVENT_BODY_CHUNK_AVAILABLE: + file_put_contents( + $local_paths[$request->original_request()->id], + $client->get_response_body_chunk(), + FILE_APPEND + ); + break; + case Client::EVENT_FAILED: + $results[$request->original_request()->url] = [ + 'success' => false, + 'error' => $request->error, + ]; + break; + case Client::EVENT_FINISHED: + $results[$request->original_request()->url] = [ + 'success' => true + ]; + break; + } + } + return $results; +} + +function normalize_path($path) { + // Check if it's an absolute path (Unix-based systems) + $isAbsolute = $path[0] === '/'; + + // Replace backslashes with forward slashes + $path = str_replace('\\', '/', $path); + + // Split the path into segments + $parts = array_filter(explode('/', $path), 'strlen'); + + $result = []; + foreach ($parts as $part) { + if ($part === '..') { + // Pop the last element if possible (go one directory up) + if (count($result) > 0) { + array_pop($result); + } elseif (!$isAbsolute) { + // If the path is relative and we're at the root, add '..' + $result[] = $part; + } + } elseif ($part !== '.') { + // Add the current directory to the stack if it's not '.' + $result[] = $part; + } + } + + // Combine the parts back into a normalized path + return ($isAbsolute ? '/' : '') . implode('/', $result); +} +function join_paths(...$paths) { + // Normalize and filter out any empty paths (excluding the root directory case) + $paths = array_map(function($path) { + // Convert backslashes to forward slashes + return str_replace('\\', '/', $path); + }, $paths); + + // Trim slashes from the end of each part and the start of each part (except the first one) + $trimmed_paths = []; + foreach ($paths as $key => $path) { + if ($key == 0) { + // For the first path, trim only trailing slashes + $trimmed_paths[] = rtrim($path, '/'); + } else { + // For subsequent paths, trim both leading and trailing slashes + $trimmed_paths[] = trim($path, '/'); + } + } + + // Join the components with a slash + $result = implode('/', $trimmed_paths); + + // Handle the case of joining to an absolute Unix path + if (strpos($paths[0], '/') === 0) { + $result = '/' . ltrim($result, '/'); + } + + return $result; +} diff --git a/packages/playground/data-liberation/src/public_suffix_list.php b/packages/playground/data-liberation/src/public_suffix_list.php new file mode 100644 index 00000000000..5f1666c482b --- /dev/null +++ b/packages/playground/data-liberation/src/public_suffix_list.php @@ -0,0 +1,1457 @@ + 1, + 'ad' => 1, + 'ae' => 1, + 'aero' => 1, + 'af' => 1, + 'ag' => 1, + 'ai' => 1, + 'al' => 1, + 'am' => 1, + 'ao' => 1, + 'aq' => 1, + 'ar' => 1, + 'arpa' => 1, + 'as' => 1, + 'asia' => 1, + 'at' => 1, + 'au' => 1, + 'aw' => 1, + 'ax' => 1, + 'az' => 1, + 'ba' => 1, + 'bb' => 1, + 'be' => 1, + 'bf' => 1, + 'bg' => 1, + 'bh' => 1, + 'bi' => 1, + 'biz' => 1, + 'bj' => 1, + 'bm' => 1, + 'bn' => 1, + 'bo' => 1, + 'br' => 1, + 'bs' => 1, + 'bt' => 1, + 'bv' => 1, + 'bw' => 1, + 'by' => 1, + 'bz' => 1, + 'ca' => 1, + 'cat' => 1, + 'cc' => 1, + 'cd' => 1, + 'cf' => 1, + 'cg' => 1, + 'ch' => 1, + 'ci' => 1, + 'cl' => 1, + 'cm' => 1, + 'cn' => 1, + 'co' => 1, + 'com' => 1, + 'coop' => 1, + 'cr' => 1, + 'cu' => 1, + 'cv' => 1, + 'cw' => 1, + 'cx' => 1, + 'cy' => 1, + 'cz' => 1, + 'de' => 1, + 'dj' => 1, + 'dk' => 1, + 'dm' => 1, + 'do' => 1, + 'dz' => 1, + 'ec' => 1, + 'edu' => 1, + 'ee' => 1, + 'eg' => 1, + 'es' => 1, + 'et' => 1, + 'eu' => 1, + 'fi' => 1, + 'fj' => 1, + 'fm' => 1, + 'fo' => 1, + 'fr' => 1, + 'ga' => 1, + 'gb' => 1, + 'gd' => 1, + 'ge' => 1, + 'gf' => 1, + 'gg' => 1, + 'gh' => 1, + 'gi' => 1, + 'gl' => 1, + 'gm' => 1, + 'gn' => 1, + 'gov' => 1, + 'gp' => 1, + 'gq' => 1, + 'gr' => 1, + 'gs' => 1, + 'gt' => 1, + 'gu' => 1, + 'gw' => 1, + 'gy' => 1, + 'hk' => 1, + 'hm' => 1, + 'hn' => 1, + 'hr' => 1, + 'ht' => 1, + 'hu' => 1, + 'id' => 1, + 'ie' => 1, + 'il' => 1, + 'ישראל' => 1, + 'im' => 1, + 'in' => 1, + 'info' => 1, + 'int' => 1, + 'io' => 1, + 'iq' => 1, + 'ir' => 1, + 'is' => 1, + 'it' => 1, + 'je' => 1, + 'jo' => 1, + 'jobs' => 1, + 'jp' => 1, + 'ke' => 1, + 'kg' => 1, + 'ki' => 1, + 'km' => 1, + 'kn' => 1, + 'kp' => 1, + 'kr' => 1, + 'kw' => 1, + 'ky' => 1, + 'kz' => 1, + 'la' => 1, + 'lb' => 1, + 'lc' => 1, + 'li' => 1, + 'lk' => 1, + 'lr' => 1, + 'ls' => 1, + 'lt' => 1, + 'lu' => 1, + 'lv' => 1, + 'ly' => 1, + 'ma' => 1, + 'mc' => 1, + 'md' => 1, + 'me' => 1, + 'mg' => 1, + 'mh' => 1, + 'mil' => 1, + 'mk' => 1, + 'ml' => 1, + 'mn' => 1, + 'mo' => 1, + 'mobi' => 1, + 'mp' => 1, + 'mq' => 1, + 'mr' => 1, + 'ms' => 1, + 'mt' => 1, + 'mu' => 1, + 'museum' => 1, + 'mv' => 1, + 'mw' => 1, + 'mx' => 1, + 'my' => 1, + 'mz' => 1, + 'na' => 1, + 'name' => 1, + 'nc' => 1, + 'ne' => 1, + 'net' => 1, + 'nf' => 1, + 'ng' => 1, + 'ni' => 1, + 'nl' => 1, + 'no' => 1, + 'nr' => 1, + 'nu' => 1, + 'nz' => 1, + 'om' => 1, + 'onion' => 1, + 'org' => 1, + 'pa' => 1, + 'pe' => 1, + 'pf' => 1, + 'ph' => 1, + 'pk' => 1, + 'pl' => 1, + 'pm' => 1, + 'pn' => 1, + 'post' => 1, + 'pr' => 1, + 'pro' => 1, + 'ps' => 1, + 'pt' => 1, + 'pw' => 1, + 'py' => 1, + 'qa' => 1, + 're' => 1, + 'ro' => 1, + 'rs' => 1, + 'ru' => 1, + 'rw' => 1, + 'sa' => 1, + 'sb' => 1, + 'sc' => 1, + 'sd' => 1, + 'se' => 1, + 'sg' => 1, + 'sh' => 1, + 'si' => 1, + 'sj' => 1, + 'sk' => 1, + 'sl' => 1, + 'sm' => 1, + 'sn' => 1, + 'so' => 1, + 'sr' => 1, + 'ss' => 1, + 'st' => 1, + 'su' => 1, + 'sv' => 1, + 'sx' => 1, + 'sy' => 1, + 'sz' => 1, + 'tc' => 1, + 'td' => 1, + 'tel' => 1, + 'tf' => 1, + 'tg' => 1, + 'th' => 1, + 'tj' => 1, + 'tk' => 1, + 'tl' => 1, + 'tm' => 1, + 'tn' => 1, + 'to' => 1, + 'tr' => 1, + 'tt' => 1, + 'tv' => 1, + 'tw' => 1, + 'tz' => 1, + 'ua' => 1, + 'ug' => 1, + 'uk' => 1, + 'us' => 1, + 'uy' => 1, + 'uz' => 1, + 'va' => 1, + 'vc' => 1, + 've' => 1, + 'vg' => 1, + 'vi' => 1, + 'vn' => 1, + 'vu' => 1, + 'wf' => 1, + 'ws' => 1, + 'yt' => 1, + 'امارات' => 1, + 'հայ' => 1, + 'বাংলা' => 1, + 'бг' => 1, + 'البحرين' => 1, + 'бел' => 1, + '中国' => 1, + '中國' => 1, + 'الجزائر' => 1, + 'مصر' => 1, + 'ею' => 1, + 'ευ' => 1, + 'موريتانيا' => 1, + 'გე' => 1, + 'ελ' => 1, + '香港' => 1, + 'ಭಾರತ' => 1, + 'ଭାରତ' => 1, + 'ভাৰত' => 1, + 'भारतम्' => 1, + 'भारोत' => 1, + 'ڀارت' => 1, + 'ഭാരതം' => 1, + 'भारत' => 1, + 'بارت' => 1, + 'بھارت' => 1, + 'భారత్' => 1, + 'ભારત' => 1, + 'ਭਾਰਤ' => 1, + 'ভারত' => 1, + 'இந்தியா' => 1, + 'ایران' => 1, + 'ايران' => 1, + 'عراق' => 1, + 'الاردن' => 1, + '한국' => 1, + 'қаз' => 1, + 'ລາວ' => 1, + 'ලංකා' => 1, + 'இலங்கை' => 1, + 'المغرب' => 1, + 'мкд' => 1, + 'мон' => 1, + '澳門' => 1, + '澳门' => 1, + 'مليسيا' => 1, + 'عمان' => 1, + 'پاکستان' => 1, + 'پاكستان' => 1, + 'فلسطين' => 1, + 'срб' => 1, + 'рф' => 1, + 'قطر' => 1, + 'السعودية' => 1, + 'السعودیة' => 1, + 'السعودیۃ' => 1, + 'السعوديه' => 1, + 'سودان' => 1, + '新加坡' => 1, + 'சிங்கப்பூர்' => 1, + 'سورية' => 1, + 'سوريا' => 1, + 'ไทย' => 1, + 'تونس' => 1, + '台灣' => 1, + '台湾' => 1, + '臺灣' => 1, + 'укр' => 1, + 'اليمن' => 1, + 'xxx' => 1, + 'ye' => 1, + 'zm' => 1, + 'zw' => 1, + 'aaa' => 1, + 'aarp' => 1, + 'abb' => 1, + 'abbott' => 1, + 'abbvie' => 1, + 'abc' => 1, + 'able' => 1, + 'abogado' => 1, + 'abudhabi' => 1, + 'academy' => 1, + 'accenture' => 1, + 'accountant' => 1, + 'accountants' => 1, + 'aco' => 1, + 'actor' => 1, + 'ads' => 1, + 'adult' => 1, + 'aeg' => 1, + 'aetna' => 1, + 'afl' => 1, + 'africa' => 1, + 'agakhan' => 1, + 'agency' => 1, + 'aig' => 1, + 'airbus' => 1, + 'airforce' => 1, + 'airtel' => 1, + 'akdn' => 1, + 'alibaba' => 1, + 'alipay' => 1, + 'allfinanz' => 1, + 'allstate' => 1, + 'ally' => 1, + 'alsace' => 1, + 'alstom' => 1, + 'amazon' => 1, + 'americanexpress' => 1, + 'americanfamily' => 1, + 'amex' => 1, + 'amfam' => 1, + 'amica' => 1, + 'amsterdam' => 1, + 'analytics' => 1, + 'android' => 1, + 'anquan' => 1, + 'anz' => 1, + 'aol' => 1, + 'apartments' => 1, + 'app' => 1, + 'apple' => 1, + 'aquarelle' => 1, + 'arab' => 1, + 'aramco' => 1, + 'archi' => 1, + 'army' => 1, + 'art' => 1, + 'arte' => 1, + 'asda' => 1, + 'associates' => 1, + 'athleta' => 1, + 'attorney' => 1, + 'auction' => 1, + 'audi' => 1, + 'audible' => 1, + 'audio' => 1, + 'auspost' => 1, + 'author' => 1, + 'auto' => 1, + 'autos' => 1, + 'aws' => 1, + 'axa' => 1, + 'azure' => 1, + 'baby' => 1, + 'baidu' => 1, + 'banamex' => 1, + 'band' => 1, + 'bank' => 1, + 'bar' => 1, + 'barcelona' => 1, + 'barclaycard' => 1, + 'barclays' => 1, + 'barefoot' => 1, + 'bargains' => 1, + 'baseball' => 1, + 'basketball' => 1, + 'bauhaus' => 1, + 'bayern' => 1, + 'bbc' => 1, + 'bbt' => 1, + 'bbva' => 1, + 'bcg' => 1, + 'bcn' => 1, + 'beats' => 1, + 'beauty' => 1, + 'beer' => 1, + 'bentley' => 1, + 'berlin' => 1, + 'best' => 1, + 'bestbuy' => 1, + 'bet' => 1, + 'bharti' => 1, + 'bible' => 1, + 'bid' => 1, + 'bike' => 1, + 'bing' => 1, + 'bingo' => 1, + 'bio' => 1, + 'black' => 1, + 'blackfriday' => 1, + 'blockbuster' => 1, + 'blog' => 1, + 'bloomberg' => 1, + 'blue' => 1, + 'bms' => 1, + 'bmw' => 1, + 'bnpparibas' => 1, + 'boats' => 1, + 'boehringer' => 1, + 'bofa' => 1, + 'bom' => 1, + 'bond' => 1, + 'boo' => 1, + 'book' => 1, + 'booking' => 1, + 'bosch' => 1, + 'bostik' => 1, + 'boston' => 1, + 'bot' => 1, + 'boutique' => 1, + 'box' => 1, + 'bradesco' => 1, + 'bridgestone' => 1, + 'broadway' => 1, + 'broker' => 1, + 'brother' => 1, + 'brussels' => 1, + 'build' => 1, + 'builders' => 1, + 'business' => 1, + 'buy' => 1, + 'buzz' => 1, + 'bzh' => 1, + 'cab' => 1, + 'cafe' => 1, + 'cal' => 1, + 'call' => 1, + 'calvinklein' => 1, + 'cam' => 1, + 'camera' => 1, + 'camp' => 1, + 'canon' => 1, + 'capetown' => 1, + 'capital' => 1, + 'capitalone' => 1, + 'car' => 1, + 'caravan' => 1, + 'cards' => 1, + 'care' => 1, + 'career' => 1, + 'careers' => 1, + 'cars' => 1, + 'casa' => 1, + 'case' => 1, + 'cash' => 1, + 'casino' => 1, + 'catering' => 1, + 'catholic' => 1, + 'cba' => 1, + 'cbn' => 1, + 'cbre' => 1, + 'center' => 1, + 'ceo' => 1, + 'cern' => 1, + 'cfa' => 1, + 'cfd' => 1, + 'chanel' => 1, + 'channel' => 1, + 'charity' => 1, + 'chase' => 1, + 'chat' => 1, + 'cheap' => 1, + 'chintai' => 1, + 'christmas' => 1, + 'chrome' => 1, + 'church' => 1, + 'cipriani' => 1, + 'circle' => 1, + 'cisco' => 1, + 'citadel' => 1, + 'citi' => 1, + 'citic' => 1, + 'city' => 1, + 'claims' => 1, + 'cleaning' => 1, + 'click' => 1, + 'clinic' => 1, + 'clinique' => 1, + 'clothing' => 1, + 'cloud' => 1, + 'club' => 1, + 'clubmed' => 1, + 'coach' => 1, + 'codes' => 1, + 'coffee' => 1, + 'college' => 1, + 'cologne' => 1, + 'commbank' => 1, + 'community' => 1, + 'company' => 1, + 'compare' => 1, + 'computer' => 1, + 'comsec' => 1, + 'condos' => 1, + 'construction' => 1, + 'consulting' => 1, + 'contact' => 1, + 'contractors' => 1, + 'cooking' => 1, + 'cool' => 1, + 'corsica' => 1, + 'country' => 1, + 'coupon' => 1, + 'coupons' => 1, + 'courses' => 1, + 'cpa' => 1, + 'credit' => 1, + 'creditcard' => 1, + 'creditunion' => 1, + 'cricket' => 1, + 'crown' => 1, + 'crs' => 1, + 'cruise' => 1, + 'cruises' => 1, + 'cuisinella' => 1, + 'cymru' => 1, + 'cyou' => 1, + 'dabur' => 1, + 'dad' => 1, + 'dance' => 1, + 'data' => 1, + 'date' => 1, + 'dating' => 1, + 'datsun' => 1, + 'day' => 1, + 'dclk' => 1, + 'dds' => 1, + 'deal' => 1, + 'dealer' => 1, + 'deals' => 1, + 'degree' => 1, + 'delivery' => 1, + 'dell' => 1, + 'deloitte' => 1, + 'delta' => 1, + 'democrat' => 1, + 'dental' => 1, + 'dentist' => 1, + 'desi' => 1, + 'design' => 1, + 'dev' => 1, + 'dhl' => 1, + 'diamonds' => 1, + 'diet' => 1, + 'digital' => 1, + 'direct' => 1, + 'directory' => 1, + 'discount' => 1, + 'discover' => 1, + 'dish' => 1, + 'diy' => 1, + 'dnp' => 1, + 'docs' => 1, + 'doctor' => 1, + 'dog' => 1, + 'domains' => 1, + 'dot' => 1, + 'download' => 1, + 'drive' => 1, + 'dtv' => 1, + 'dubai' => 1, + 'dunlop' => 1, + 'dupont' => 1, + 'durban' => 1, + 'dvag' => 1, + 'dvr' => 1, + 'earth' => 1, + 'eat' => 1, + 'eco' => 1, + 'edeka' => 1, + 'education' => 1, + 'email' => 1, + 'emerck' => 1, + 'energy' => 1, + 'engineer' => 1, + 'engineering' => 1, + 'enterprises' => 1, + 'epson' => 1, + 'equipment' => 1, + 'ericsson' => 1, + 'erni' => 1, + 'esq' => 1, + 'estate' => 1, + 'eurovision' => 1, + 'eus' => 1, + 'events' => 1, + 'exchange' => 1, + 'expert' => 1, + 'exposed' => 1, + 'express' => 1, + 'extraspace' => 1, + 'fage' => 1, + 'fail' => 1, + 'fairwinds' => 1, + 'faith' => 1, + 'family' => 1, + 'fan' => 1, + 'fans' => 1, + 'farm' => 1, + 'farmers' => 1, + 'fashion' => 1, + 'fast' => 1, + 'fedex' => 1, + 'feedback' => 1, + 'ferrari' => 1, + 'ferrero' => 1, + 'fidelity' => 1, + 'fido' => 1, + 'film' => 1, + 'final' => 1, + 'finance' => 1, + 'financial' => 1, + 'fire' => 1, + 'firestone' => 1, + 'firmdale' => 1, + 'fish' => 1, + 'fishing' => 1, + 'fit' => 1, + 'fitness' => 1, + 'flickr' => 1, + 'flights' => 1, + 'flir' => 1, + 'florist' => 1, + 'flowers' => 1, + 'fly' => 1, + 'foo' => 1, + 'food' => 1, + 'football' => 1, + 'ford' => 1, + 'forex' => 1, + 'forsale' => 1, + 'forum' => 1, + 'foundation' => 1, + 'fox' => 1, + 'free' => 1, + 'fresenius' => 1, + 'frl' => 1, + 'frogans' => 1, + 'frontier' => 1, + 'ftr' => 1, + 'fujitsu' => 1, + 'fun' => 1, + 'fund' => 1, + 'furniture' => 1, + 'futbol' => 1, + 'fyi' => 1, + 'gal' => 1, + 'gallery' => 1, + 'gallo' => 1, + 'gallup' => 1, + 'game' => 1, + 'games' => 1, + 'gap' => 1, + 'garden' => 1, + 'gay' => 1, + 'gbiz' => 1, + 'gdn' => 1, + 'gea' => 1, + 'gent' => 1, + 'genting' => 1, + 'george' => 1, + 'ggee' => 1, + 'gift' => 1, + 'gifts' => 1, + 'gives' => 1, + 'giving' => 1, + 'glass' => 1, + 'gle' => 1, + 'global' => 1, + 'globo' => 1, + 'gmail' => 1, + 'gmbh' => 1, + 'gmo' => 1, + 'gmx' => 1, + 'godaddy' => 1, + 'gold' => 1, + 'goldpoint' => 1, + 'golf' => 1, + 'goo' => 1, + 'goodyear' => 1, + 'goog' => 1, + 'google' => 1, + 'gop' => 1, + 'got' => 1, + 'grainger' => 1, + 'graphics' => 1, + 'gratis' => 1, + 'green' => 1, + 'gripe' => 1, + 'grocery' => 1, + 'group' => 1, + 'gucci' => 1, + 'guge' => 1, + 'guide' => 1, + 'guitars' => 1, + 'guru' => 1, + 'hair' => 1, + 'hamburg' => 1, + 'hangout' => 1, + 'haus' => 1, + 'hbo' => 1, + 'hdfc' => 1, + 'hdfcbank' => 1, + 'health' => 1, + 'healthcare' => 1, + 'help' => 1, + 'helsinki' => 1, + 'here' => 1, + 'hermes' => 1, + 'hiphop' => 1, + 'hisamitsu' => 1, + 'hitachi' => 1, + 'hiv' => 1, + 'hkt' => 1, + 'hockey' => 1, + 'holdings' => 1, + 'holiday' => 1, + 'homedepot' => 1, + 'homegoods' => 1, + 'homes' => 1, + 'homesense' => 1, + 'honda' => 1, + 'horse' => 1, + 'hospital' => 1, + 'host' => 1, + 'hosting' => 1, + 'hot' => 1, + 'hotels' => 1, + 'hotmail' => 1, + 'house' => 1, + 'how' => 1, + 'hsbc' => 1, + 'hughes' => 1, + 'hyatt' => 1, + 'hyundai' => 1, + 'ibm' => 1, + 'icbc' => 1, + 'ice' => 1, + 'icu' => 1, + 'ieee' => 1, + 'ifm' => 1, + 'ikano' => 1, + 'imamat' => 1, + 'imdb' => 1, + 'immo' => 1, + 'immobilien' => 1, + 'inc' => 1, + 'industries' => 1, + 'infiniti' => 1, + 'ing' => 1, + 'ink' => 1, + 'institute' => 1, + 'insurance' => 1, + 'insure' => 1, + 'international' => 1, + 'intuit' => 1, + 'investments' => 1, + 'ipiranga' => 1, + 'irish' => 1, + 'ismaili' => 1, + 'ist' => 1, + 'istanbul' => 1, + 'itau' => 1, + 'itv' => 1, + 'jaguar' => 1, + 'java' => 1, + 'jcb' => 1, + 'jeep' => 1, + 'jetzt' => 1, + 'jewelry' => 1, + 'jio' => 1, + 'jll' => 1, + 'jmp' => 1, + 'jnj' => 1, + 'joburg' => 1, + 'jot' => 1, + 'joy' => 1, + 'jpmorgan' => 1, + 'jprs' => 1, + 'juegos' => 1, + 'juniper' => 1, + 'kaufen' => 1, + 'kddi' => 1, + 'kerryhotels' => 1, + 'kerrylogistics' => 1, + 'kerryproperties' => 1, + 'kfh' => 1, + 'kia' => 1, + 'kids' => 1, + 'kim' => 1, + 'kindle' => 1, + 'kitchen' => 1, + 'kiwi' => 1, + 'koeln' => 1, + 'komatsu' => 1, + 'kosher' => 1, + 'kpmg' => 1, + 'kpn' => 1, + 'krd' => 1, + 'kred' => 1, + 'kuokgroup' => 1, + 'kyoto' => 1, + 'lacaixa' => 1, + 'lamborghini' => 1, + 'lamer' => 1, + 'lancaster' => 1, + 'land' => 1, + 'landrover' => 1, + 'lanxess' => 1, + 'lasalle' => 1, + 'lat' => 1, + 'latino' => 1, + 'latrobe' => 1, + 'law' => 1, + 'lawyer' => 1, + 'lds' => 1, + 'lease' => 1, + 'leclerc' => 1, + 'lefrak' => 1, + 'legal' => 1, + 'lego' => 1, + 'lexus' => 1, + 'lgbt' => 1, + 'lidl' => 1, + 'life' => 1, + 'lifeinsurance' => 1, + 'lifestyle' => 1, + 'lighting' => 1, + 'like' => 1, + 'lilly' => 1, + 'limited' => 1, + 'limo' => 1, + 'lincoln' => 1, + 'link' => 1, + 'lipsy' => 1, + 'live' => 1, + 'living' => 1, + 'llc' => 1, + 'llp' => 1, + 'loan' => 1, + 'loans' => 1, + 'locker' => 1, + 'locus' => 1, + 'lol' => 1, + 'london' => 1, + 'lotte' => 1, + 'lotto' => 1, + 'love' => 1, + 'lpl' => 1, + 'lplfinancial' => 1, + 'ltd' => 1, + 'ltda' => 1, + 'lundbeck' => 1, + 'luxe' => 1, + 'luxury' => 1, + 'madrid' => 1, + 'maif' => 1, + 'maison' => 1, + 'makeup' => 1, + 'man' => 1, + 'management' => 1, + 'mango' => 1, + 'map' => 1, + 'market' => 1, + 'marketing' => 1, + 'markets' => 1, + 'marriott' => 1, + 'marshalls' => 1, + 'mattel' => 1, + 'mba' => 1, + 'mckinsey' => 1, + 'med' => 1, + 'media' => 1, + 'meet' => 1, + 'melbourne' => 1, + 'meme' => 1, + 'memorial' => 1, + 'men' => 1, + 'menu' => 1, + 'merckmsd' => 1, + 'miami' => 1, + 'microsoft' => 1, + 'mini' => 1, + 'mint' => 1, + 'mit' => 1, + 'mitsubishi' => 1, + 'mlb' => 1, + 'mls' => 1, + 'mma' => 1, + 'mobile' => 1, + 'moda' => 1, + 'moe' => 1, + 'moi' => 1, + 'mom' => 1, + 'monash' => 1, + 'money' => 1, + 'monster' => 1, + 'mormon' => 1, + 'mortgage' => 1, + 'moscow' => 1, + 'moto' => 1, + 'motorcycles' => 1, + 'mov' => 1, + 'movie' => 1, + 'msd' => 1, + 'mtn' => 1, + 'mtr' => 1, + 'music' => 1, + 'nab' => 1, + 'nagoya' => 1, + 'navy' => 1, + 'nba' => 1, + 'nec' => 1, + 'netbank' => 1, + 'netflix' => 1, + 'network' => 1, + 'neustar' => 1, + 'new' => 1, + 'news' => 1, + 'next' => 1, + 'nextdirect' => 1, + 'nexus' => 1, + 'nfl' => 1, + 'ngo' => 1, + 'nhk' => 1, + 'nico' => 1, + 'nike' => 1, + 'nikon' => 1, + 'ninja' => 1, + 'nissan' => 1, + 'nissay' => 1, + 'nokia' => 1, + 'norton' => 1, + 'now' => 1, + 'nowruz' => 1, + 'nowtv' => 1, + 'nra' => 1, + 'nrw' => 1, + 'ntt' => 1, + 'nyc' => 1, + 'obi' => 1, + 'observer' => 1, + 'office' => 1, + 'okinawa' => 1, + 'olayan' => 1, + 'olayangroup' => 1, + 'ollo' => 1, + 'omega' => 1, + 'one' => 1, + 'ong' => 1, + 'onl' => 1, + 'online' => 1, + 'ooo' => 1, + 'open' => 1, + 'oracle' => 1, + 'orange' => 1, + 'organic' => 1, + 'origins' => 1, + 'osaka' => 1, + 'otsuka' => 1, + 'ott' => 1, + 'ovh' => 1, + 'page' => 1, + 'panasonic' => 1, + 'paris' => 1, + 'pars' => 1, + 'partners' => 1, + 'parts' => 1, + 'party' => 1, + 'pay' => 1, + 'pccw' => 1, + 'pet' => 1, + 'pfizer' => 1, + 'pharmacy' => 1, + 'phd' => 1, + 'philips' => 1, + 'phone' => 1, + 'photo' => 1, + 'photography' => 1, + 'photos' => 1, + 'physio' => 1, + 'pics' => 1, + 'pictet' => 1, + 'pictures' => 1, + 'pid' => 1, + 'pin' => 1, + 'ping' => 1, + 'pink' => 1, + 'pioneer' => 1, + 'pizza' => 1, + 'place' => 1, + 'play' => 1, + 'playstation' => 1, + 'plumbing' => 1, + 'plus' => 1, + 'pnc' => 1, + 'pohl' => 1, + 'poker' => 1, + 'politie' => 1, + 'porn' => 1, + 'pramerica' => 1, + 'praxi' => 1, + 'press' => 1, + 'prime' => 1, + 'prod' => 1, + 'productions' => 1, + 'prof' => 1, + 'progressive' => 1, + 'promo' => 1, + 'properties' => 1, + 'property' => 1, + 'protection' => 1, + 'pru' => 1, + 'prudential' => 1, + 'pub' => 1, + 'pwc' => 1, + 'qpon' => 1, + 'quebec' => 1, + 'quest' => 1, + 'racing' => 1, + 'radio' => 1, + 'read' => 1, + 'realestate' => 1, + 'realtor' => 1, + 'realty' => 1, + 'recipes' => 1, + 'red' => 1, + 'redstone' => 1, + 'redumbrella' => 1, + 'rehab' => 1, + 'reise' => 1, + 'reisen' => 1, + 'reit' => 1, + 'reliance' => 1, + 'ren' => 1, + 'rent' => 1, + 'rentals' => 1, + 'repair' => 1, + 'report' => 1, + 'republican' => 1, + 'rest' => 1, + 'restaurant' => 1, + 'review' => 1, + 'reviews' => 1, + 'rexroth' => 1, + 'rich' => 1, + 'richardli' => 1, + 'ricoh' => 1, + 'ril' => 1, + 'rio' => 1, + 'rip' => 1, + 'rocks' => 1, + 'rodeo' => 1, + 'rogers' => 1, + 'room' => 1, + 'rsvp' => 1, + 'rugby' => 1, + 'ruhr' => 1, + 'run' => 1, + 'rwe' => 1, + 'ryukyu' => 1, + 'saarland' => 1, + 'safe' => 1, + 'safety' => 1, + 'sakura' => 1, + 'sale' => 1, + 'salon' => 1, + 'samsclub' => 1, + 'samsung' => 1, + 'sandvik' => 1, + 'sandvikcoromant' => 1, + 'sanofi' => 1, + 'sap' => 1, + 'sarl' => 1, + 'sas' => 1, + 'save' => 1, + 'saxo' => 1, + 'sbi' => 1, + 'sbs' => 1, + 'scb' => 1, + 'schaeffler' => 1, + 'schmidt' => 1, + 'scholarships' => 1, + 'school' => 1, + 'schule' => 1, + 'schwarz' => 1, + 'science' => 1, + 'scot' => 1, + 'search' => 1, + 'seat' => 1, + 'secure' => 1, + 'security' => 1, + 'seek' => 1, + 'select' => 1, + 'sener' => 1, + 'services' => 1, + 'seven' => 1, + 'sew' => 1, + 'sex' => 1, + 'sexy' => 1, + 'sfr' => 1, + 'shangrila' => 1, + 'sharp' => 1, + 'shaw' => 1, + 'shell' => 1, + 'shia' => 1, + 'shiksha' => 1, + 'shoes' => 1, + 'shop' => 1, + 'shopping' => 1, + 'shouji' => 1, + 'show' => 1, + 'silk' => 1, + 'sina' => 1, + 'singles' => 1, + 'site' => 1, + 'ski' => 1, + 'skin' => 1, + 'sky' => 1, + 'skype' => 1, + 'sling' => 1, + 'smart' => 1, + 'smile' => 1, + 'sncf' => 1, + 'soccer' => 1, + 'social' => 1, + 'softbank' => 1, + 'software' => 1, + 'sohu' => 1, + 'solar' => 1, + 'solutions' => 1, + 'song' => 1, + 'sony' => 1, + 'soy' => 1, + 'spa' => 1, + 'space' => 1, + 'sport' => 1, + 'spot' => 1, + 'srl' => 1, + 'stada' => 1, + 'staples' => 1, + 'star' => 1, + 'statebank' => 1, + 'statefarm' => 1, + 'stc' => 1, + 'stcgroup' => 1, + 'stockholm' => 1, + 'storage' => 1, + 'store' => 1, + 'stream' => 1, + 'studio' => 1, + 'study' => 1, + 'style' => 1, + 'sucks' => 1, + 'supplies' => 1, + 'supply' => 1, + 'support' => 1, + 'surf' => 1, + 'surgery' => 1, + 'suzuki' => 1, + 'swatch' => 1, + 'swiss' => 1, + 'sydney' => 1, + 'systems' => 1, + 'tab' => 1, + 'taipei' => 1, + 'talk' => 1, + 'taobao' => 1, + 'target' => 1, + 'tatamotors' => 1, + 'tatar' => 1, + 'tattoo' => 1, + 'tax' => 1, + 'taxi' => 1, + 'tci' => 1, + 'tdk' => 1, + 'team' => 1, + 'tech' => 1, + 'technology' => 1, + 'temasek' => 1, + 'tennis' => 1, + 'teva' => 1, + 'thd' => 1, + 'theater' => 1, + 'theatre' => 1, + 'tiaa' => 1, + 'tickets' => 1, + 'tienda' => 1, + 'tips' => 1, + 'tires' => 1, + 'tirol' => 1, + 'tjmaxx' => 1, + 'tjx' => 1, + 'tkmaxx' => 1, + 'tmall' => 1, + 'today' => 1, + 'tokyo' => 1, + 'tools' => 1, + 'top' => 1, + 'toray' => 1, + 'toshiba' => 1, + 'total' => 1, + 'tours' => 1, + 'town' => 1, + 'toyota' => 1, + 'toys' => 1, + 'trade' => 1, + 'trading' => 1, + 'training' => 1, + 'travel' => 1, + 'travelers' => 1, + 'travelersinsurance' => 1, + 'trust' => 1, + 'trv' => 1, + 'tube' => 1, + 'tui' => 1, + 'tunes' => 1, + 'tushu' => 1, + 'tvs' => 1, + 'ubank' => 1, + 'ubs' => 1, + 'unicom' => 1, + 'university' => 1, + 'uno' => 1, + 'uol' => 1, + 'ups' => 1, + 'vacations' => 1, + 'vana' => 1, + 'vanguard' => 1, + 'vegas' => 1, + 'ventures' => 1, + 'verisign' => 1, + 'versicherung' => 1, + 'vet' => 1, + 'viajes' => 1, + 'video' => 1, + 'vig' => 1, + 'viking' => 1, + 'villas' => 1, + 'vin' => 1, + 'vip' => 1, + 'virgin' => 1, + 'visa' => 1, + 'vision' => 1, + 'viva' => 1, + 'vivo' => 1, + 'vlaanderen' => 1, + 'vodka' => 1, + 'volvo' => 1, + 'vote' => 1, + 'voting' => 1, + 'voto' => 1, + 'voyage' => 1, + 'wales' => 1, + 'walmart' => 1, + 'walter' => 1, + 'wang' => 1, + 'wanggou' => 1, + 'watch' => 1, + 'watches' => 1, + 'weather' => 1, + 'weatherchannel' => 1, + 'webcam' => 1, + 'weber' => 1, + 'website' => 1, + 'wed' => 1, + 'wedding' => 1, + 'weibo' => 1, + 'weir' => 1, + 'whoswho' => 1, + 'wien' => 1, + 'wiki' => 1, + 'williamhill' => 1, + 'win' => 1, + 'windows' => 1, + 'wine' => 1, + 'winners' => 1, + 'wme' => 1, + 'wolterskluwer' => 1, + 'woodside' => 1, + 'work' => 1, + 'works' => 1, + 'world' => 1, + 'wow' => 1, + 'wtc' => 1, + 'wtf' => 1, + 'xbox' => 1, + 'xerox' => 1, + 'xihuan' => 1, + 'xin' => 1, + 'कॉम' => 1, + 'セール' => 1, + '佛山' => 1, + '慈善' => 1, + '集团' => 1, + '在线' => 1, + '点看' => 1, + 'คอม' => 1, + '八卦' => 1, + 'موقع' => 1, + '公益' => 1, + '公司' => 1, + '香格里拉' => 1, + '网站' => 1, + '移动' => 1, + '我爱你' => 1, + 'москва' => 1, + 'католик' => 1, + 'онлайн' => 1, + 'сайт' => 1, + '联通' => 1, + 'קום' => 1, + '时尚' => 1, + '微博' => 1, + '淡马锡' => 1, + 'ファッション' => 1, + 'орг' => 1, + 'नेट' => 1, + 'ストア' => 1, + 'アマゾン' => 1, + '삼성' => 1, + '商标' => 1, + '商店' => 1, + '商城' => 1, + 'дети' => 1, + 'ポイント' => 1, + '新闻' => 1, + '家電' => 1, + 'كوم' => 1, + '中文网' => 1, + '中信' => 1, + '娱乐' => 1, + '谷歌' => 1, + '電訊盈科' => 1, + '购物' => 1, + 'クラウド' => 1, + '通販' => 1, + '网店' => 1, + 'संगठन' => 1, + '餐厅' => 1, + '网络' => 1, + 'ком' => 1, + '亚马逊' => 1, + '食品' => 1, + '飞利浦' => 1, + '手机' => 1, + 'ارامكو' => 1, + 'العليان' => 1, + 'بازار' => 1, + 'ابوظبي' => 1, + 'كاثوليك' => 1, + 'همراه' => 1, + '닷컴' => 1, + '政府' => 1, + 'شبكة' => 1, + 'بيتك' => 1, + 'عرب' => 1, + '机构' => 1, + '组织机构' => 1, + '健康' => 1, + '招聘' => 1, + 'рус' => 1, + '大拿' => 1, + 'みんな' => 1, + 'グーグル' => 1, + '世界' => 1, + '書籍' => 1, + '网址' => 1, + '닷넷' => 1, + 'コム' => 1, + '天主教' => 1, + '游戏' => 1, + 'vermögensberater' => 1, + 'vermögensberatung' => 1, + '企业' => 1, + '信息' => 1, + '嘉里大酒店' => 1, + '嘉里' => 1, + '广东' => 1, + '政务' => 1, + 'xyz' => 1, + 'yachts' => 1, + 'yahoo' => 1, + 'yamaxun' => 1, + 'yandex' => 1, + 'yodobashi' => 1, + 'yoga' => 1, + 'yokohama' => 1, + 'you' => 1, + 'youtube' => 1, + 'yun' => 1, + 'zappos' => 1, + 'zara' => 1, + 'zero' => 1, + 'zip' => 1, + 'zone' => 1, + 'zuerich' => 1, +); diff --git a/packages/playground/data-liberation/tests/URLParserWHATWGComplianceTests.php b/packages/playground/data-liberation/tests/URLParserWHATWGComplianceTests.php new file mode 100644 index 00000000000..62ec6161d88 --- /dev/null +++ b/packages/playground/data-liberation/tests/URLParserWHATWGComplianceTests.php @@ -0,0 +1,133 @@ +assertTrue(URL::canParse($example['input'], $example['base'])); + + $parsed = new URL($example['input'], $example['base']); + $this->assertEquals($example['protocol'], $parsed->protocol); + $this->assertEquals($example['username'], $parsed->username); + $this->assertEquals($example['password'], $parsed->password); + $this->assertEquals($example['host'], $parsed->host); + $this->assertEquals($example['port'], $parsed->port); + $this->assertEquals($example['hostname'], $parsed->hostname); + $this->assertEquals($example['pathname'], $parsed->pathname); + $this->assertEquals($example['search'], $parsed->search); + $this->assertEquals($example['hash'], $parsed->hash); + if (isset($example['origin'])) { + $this->assertEquals($example['origin'], $parsed->origin); + } + } + + /** + * Data provider. + * + * @return array[]. + */ + static public function data_valid_urls() { + static $test_examples = load_test_examples(); + + $valid_urls = array(); + foreach($test_examples as $example) { + if(is_string($example)) { + continue; + } + if (!isset($example['failure'])) { + $valid_urls[] = [$example]; + } + } + return $valid_urls; + + // @TODO: Figure out why this test case fails. I had to remove it from urltestdata.json because + // the UTF-16 sequences made the json_code return null. The error was: + // + // > Single unpaired UTF-16 surrogate in unicode escape + // + // $valid_urls[] = array( + // array( + // "input" => "http://example.com/".encodeUtf16Hex('d800')."𐟾".encodeUtf16Hex('dfff')."﷐﷏﷯ﷰ￾￿?".encodeUtf16Hex('d800')."𐟾".encodeUtf16Hex('dfff')."﷐﷏﷯ﷰ￾￿", + // "base" => null, + // "href" => "http://example.com/%EF%BF%BD%F0%90%9F%BE%EF%BF%BD%EF%B7%90%EF%B7%8F%EF%B7%AF%EF%B7%B0%EF%BF%BE%EF%BF%BF?%EF%BF%BD%F0%90%9F%BE%EF%BF%BD%EF%B7%90%EF%B7%8F%EF%B7%AF%EF%B7%B0%EF%BF%BE%EF%BF%BF", + // "origin" => "http://example.com", + // "protocol" => "http:", + // "username" => "", + // "password" => "", + // "host" => "example.com", + // "hostname" => "example.com", + // "port" => "", + // "pathname" => "/%EF%BF%BD%F0%90%9F%BE%EF%BF%BD%EF%B7%90%EF%B7%8F%EF%B7%AF%EF%B7%B0%EF%BF%BE%EF%BF%BF", + // "search" => "?%EF%BF%BD%F0%90%9F%BE%EF%BF%BD%EF%B7%90%EF%B7%8F%EF%B7%AF%EF%B7%B0%EF%BF%BE%EF%BF%BF", + // "hash" => "" + // ), + // ), + } + + /** + * Test url_parser function. + * + * @dataProvider data_invalid_urls + * @return void + */ + public function test_parse_invalid_url($example) + { + $this->assertFalse( + URL::canParse($example['input'], $example['base']), + "Should have rejected invalid URL {$example['input']} with base {$example['base']}" + ); + } + + /** + * Data provider. + * + * @return array[]. + */ + static public function data_invalid_urls() { + static $test_examples = load_test_examples(); + + $urls = array(); + foreach($test_examples as $example) { + if(is_string($example)) { + continue; + } + if (array_key_exists('failure', $example) && $example['failure'] === true) { + $urls[] = [$example]; + } + } + return $urls; + } + +} diff --git a/packages/playground/data-liberation/tests/WPBlockMarkupProcessorTests.php b/packages/playground/data-liberation/tests/WPBlockMarkupProcessorTests.php new file mode 100644 index 00000000000..3817d63b4d5 --- /dev/null +++ b/packages/playground/data-liberation/tests/WPBlockMarkupProcessorTests.php @@ -0,0 +1,353 @@ +next_token(); + $this->assertEquals( '#block-comment', $p->get_token_type(), 'Failed to identify the block comment' ); + $this->assertEquals( $block_name, $p->get_block_name(), 'Failed to identify the block name' ); + $this->assertEquals( $block_attributes, $p->get_block_attributes(), 'Failed to identify the block attributes' ); + } + + static public function provider_test_finds_block_openers() { + return [ + 'Opener without attributes' => [ '', 'wp:paragraph', null ], + 'Opener without the trailing whitespace' => [ '', 'wp:paragraph', null ], + 'Opener with a lot of trailing whitespace' => [ '', 'wp:paragraph', null ], + 'Opener with attributes' => [ + '', + 'wp:paragraph', + [ 'class' => 'wp-bold' ], + ], + 'Opener with empty attributes' => [ '', 'wp:paragraph', [] ], + 'Opener with lots of whitespace around attributes' => [ + '', + 'wp:paragraph', + [ 'class' => 'wp-bold' ], + ], + 'Opener with object and array attributes' => [ + '', + 'wp:code', + [ 'meta' => [ 'language' => 'php', 'highlightedLines' => [ 14, 22 ] ], 'class' => 'dark' ], + ], + ]; + } + + /** + * + * @dataProvider provider_test_finds_block_closers + */ + public function test_find_block_closers( $markup, $block_name ) { + $p = new WP_Block_Markup_Processor( $markup ); + $p->next_token(); + $this->assertEquals( '#block-comment', $p->get_token_type(), 'Failed to identify the block comment' ); + $this->assertEquals( $block_name, $p->get_block_name(), 'Failed to identify the block name' ); + $this->assertTrue( $p->is_block_closer(), 'Failed to identify the block closer status' ); + } + + static public function provider_test_finds_block_closers() { + return [ + 'Closer without attributes' => [ '', 'wp:paragraph' ], + 'Closer without the trailing whitespace' => [ '', 'wp:paragraph' ], + 'Closer with a lot of trailing whitespace' => [ '', 'wp:paragraph' ], + ]; + } + + /** + * + * @dataProvider provider_test_treat_invalid_block_openers_as_comments + */ + public function test_treat_invalid_block_openers_as_comments( $markup ) { + $p = new WP_Block_Markup_Processor( $markup ); + $p->next_token(); + $this->assertEquals( '#comment', $p->get_token_type(), 'Failed to identify the comment' ); + $this->assertFalse( $p->get_block_name(), 'The block name wasn\'t false' ); + $this->assertFalse( $p->get_block_attributes(), 'The block attributes weren\'t false' ); + } + + static public function provider_test_treat_invalid_block_openers_as_comments() { + return [ + 'Opener with a line break before whitespace' => [ "", ], + 'Block name including !' => [ '', ], + 'Block name including a whitespace' => [ '', ], + 'No namespace in the block name' => [ '', ], + 'Non-object attributes' => [ '', ], + 'Invalid JSON as attributes – Double }} ' => [ '', ], + ]; + } + + /** + * + * @dataProvider provider_test_treat_invalid_block_closers_as_comments + */ + public function test_treat_invalid_block_closers_as_comments( $markup ) { + $p = new WP_Block_Markup_Processor( $markup ); + $p->next_token(); + $this->assertEquals( '#comment', $p->get_token_type(), 'Failed to identify the comment' ); + $this->assertFalse( $p->get_block_name(), 'The block name wasn\'t false' ); + $this->assertFalse( $p->get_block_attributes(), 'The block attributes weren\'t false' ); + } + + static public function provider_test_treat_invalid_block_closers_as_comments() { + return [ + 'Closer with a line break before whitespace' => [ "", ], + 'Closer with attributes' => [ '', ], + 'Closer with solidus at the end (before whitespace)' => [ '', ], + 'Closer with solidus at the end (after whitespace)' => [ '', ], + ]; + } + + /** + * @dataProvider provider_test_set_modifiable_text + */ + public function test_set_modifiable_text( $markup, $new_text, $new_markup, $which_token = 1 ) { + $p = new WP_Block_Markup_Processor( $markup ); + for ( $i = 0; $i < $which_token; $i ++ ) { + $p->next_token(); + } + $this->assertTrue( $p->set_modifiable_text( $new_text ), 'Failed to set the modifiable text.' ); + $this->assertEquals( $new_markup, $p->get_updated_html(), 'Failed to set the modifiable text.' ); + } + + static public function provider_test_set_modifiable_text() { + return [ + 'Changing the text of a block comment' => [ + '', + ' wp:paragraph {"class": "wp-bold"} ', + '', + ], + 'Changing the text of a text node' => [ + 'Hello, there', + 'I am a new text', + 'I am a new text', + ], + 'Changing the text of a text node in a tag' => [ + '

Hello, there

', + 'I am a new text', + '

I am a new text

', + 2, + ], + 'Escapes the text in a text node' => [ + '

Hello, there

', + 'The
tag is my favorite one', + '

The <div> tag is my favorite one

', + 2, + ], + ]; + } + + /** + * @dataProvider provider_test_set_modifiable_text_invalid_nodes + */ + public function test_set_modifiable_text_refuses_to_process_unsupported_nodes( $markup ) { + $p = new WP_Block_Markup_Processor( $markup ); + $p->next_token(); + $this->assertFalse( $p->set_modifiable_text( 'New text' ), 'Set the modifiable text on an unsupported node.' ); + } + + + static public function provider_test_set_modifiable_text_invalid_nodes() { + return [ + 'Tag' => ['
'], + 'DOCTYPE' => [''], + 'Funky comment' => [''], + ]; + } + + public function test_set_modifiable_text_can_be_called_twice() { + $p = new WP_Block_Markup_Processor( '

Hey there

' ); + $p->next_token(); + $p->next_token(); + $this->assertTrue( $p->set_modifiable_text( 'This is the new text, it is much longer' ), 'Failed to set the modifiable text.' ); + $this->assertEquals( + '

This is the new text, it is much longer

', + $p->get_updated_html(), + 'Failed to set the modifiable text.' + ); + + $this->assertTrue( $p->set_modifiable_text( 'Back to short text :)' ), 'Failed to set the modifiable text.' ); + $this->assertEquals( + '

Back to short text :)

', + $p->get_updated_html(), + 'Failed to set the modifiable text.' + ); + } + + public function test_next_block_attribute_returns_false_after_the_last_attribute() { + $p = new WP_Block_Markup_Processor( + '' + ); + $this->assertTrue( $p->next_token(), 'Failed to find the block opener' ); + $this->assertTrue( $p->next_block_attribute(), 'Failed to find the first block attribute' ); + $this->assertTrue( $p->next_block_attribute(), 'Failed to find the first block attribute' ); + $this->assertFalse( $p->next_block_attribute(), 'Returned true even though there was no next attribute' ); + } + + public function test_next_block_attribute_finds_the_first_attribute() { + $p = new WP_Block_Markup_Processor( + '' + ); + $this->assertTrue( $p->next_token(), 'Failed to find the block opener' ); + $this->assertTrue( $p->next_block_attribute(), 'Failed to find the first block attribute' ); + + $this->assertEquals( 'class', $p->get_block_attribute_key(), 'Failed to find the block attribute name' ); + $this->assertEquals( 'wp-bold', $p->get_block_attribute_value(), 'Failed to find the block attribute value' ); + } + + public function test_next_block_attribute_finds_the_second_attribute() { + $p = new WP_Block_Markup_Processor( + '' + ); + $this->assertTrue( $p->next_token(), 'Failed to find the block opener' ); + $this->assertTrue( $p->next_block_attribute(), 'Failed to find the first block attribute' ); + $this->assertTrue( $p->next_block_attribute(), 'Failed to find the second block attribute' ); + + $this->assertEquals( 'id', $p->get_block_attribute_key(), 'Failed to find the block attribute name' ); + $this->assertEquals( 'New York City', $p->get_block_attribute_value(), 'Failed to find the block attribute value' ); + } + + public function test_next_block_attribute_finds_nested_attributes() { + $p = new WP_Block_Markup_Processor( + '' + ); + $this->assertTrue( $p->next_token(), 'Failed to find the block opener' ); + $this->assertTrue( $p->next_block_attribute(), 'Failed to find the first block attribute' ); + $this->assertTrue( $p->next_block_attribute(), 'Failed to find the second block attribute' ); + $this->assertTrue( $p->next_block_attribute(), 'Failed to find the third block attribute' ); + + $this->assertEquals( 'lowres', $p->get_block_attribute_key(), 'Failed to find the block attribute name' ); + $this->assertEquals( 'small.png', $p->get_block_attribute_value(), 'Failed to find the block attribute value' ); + + $this->assertTrue( $p->next_block_attribute(), 'Failed to find the second block attribute' ); + + $this->assertEquals( 'hires', $p->get_block_attribute_key(), 'Failed to find the block attribute name' ); + $this->assertEquals( 'large.png', $p->get_block_attribute_value(), 'Failed to find the block attribute value' ); + } + + public function test_next_block_attribute_loops_over_lists() { + $p = new WP_Block_Markup_Processor( + '' + ); + $this->assertTrue( $p->next_token(), 'Failed to find the block opener' ); + $this->assertTrue( $p->next_block_attribute(), 'Failed to find the first block attribute' ); + $this->assertTrue( $p->next_block_attribute(), 'Failed to find the second block attribute' ); + $this->assertTrue( $p->next_block_attribute(), 'Failed to find the third block attribute' ); + + $this->assertEquals( 0, $p->get_block_attribute_key(), 'Failed to find the block attribute name' ); + $this->assertEquals( 'small.png', $p->get_block_attribute_value(), 'Failed to find the block attribute value' ); + + $this->assertTrue( $p->next_block_attribute(), 'Failed to find the second block attribute' ); + + $this->assertEquals( 1, $p->get_block_attribute_key(), 'Failed to find the block attribute name' ); + $this->assertEquals( 'large.png', $p->get_block_attribute_value(), 'Failed to find the block attribute value' ); + } + + public function test_next_block_attribute_finds_top_level_attributes_after_nesting() { + $p = new WP_Block_Markup_Processor( + '' + ); + $this->assertTrue( $p->next_token(), 'Failed to find the block opener' ); + $this->assertTrue( $p->next_block_attribute(), 'Failed to find the first block attribute' ); + $this->assertTrue( $p->next_block_attribute(), 'Failed to find the second block attribute' ); + $this->assertTrue( $p->next_block_attribute(), 'Failed to find the third block attribute' ); + $this->assertTrue( $p->next_block_attribute(), 'Failed to find the fourth block attribute' ); + + $this->assertEquals( 'class', $p->get_block_attribute_key(), 'Failed to find the block attribute name' ); + $this->assertEquals( 'wp-bold', $p->get_block_attribute_value(), 'Failed to find the block attribute value' ); + } + + public function test_set_block_attribute_value_updates_a_simple_attribute() { + $p = new WP_Block_Markup_Processor( + '' + ); + $this->assertTrue( $p->next_token(), 'Failed to find the block opener' ); + $this->assertTrue( $p->next_block_attribute(), 'Failed to find the first block attribute' ); + + $p->set_block_attribute_value( 'wp-italics' ); + $this->assertEquals( '', $p->get_updated_html(), + 'Failed to update the block attribute value' ); + } + + public function test_set_block_attribute_value_updates_affects_get_block_attribute_value() { + $p = new WP_Block_Markup_Processor( + '' + ); + $this->assertTrue( $p->next_token(), 'Failed to find the block opener' ); + $this->assertTrue( $p->next_block_attribute(), 'Failed to find the first block attribute' ); + + $p->set_block_attribute_value( 'wp-italics' ); + $this->assertEquals( 'wp-italics', $p->get_block_attribute_value(), 'Failed to find the block attribute value' ); + } + + public function test_set_block_attribute_value_updates_a_nested_attribute() { + $p = new WP_Block_Markup_Processor( + '' + ); + $this->assertTrue( $p->next_token(), 'Failed to find the block opener' ); + $this->assertTrue( $p->next_block_attribute(), 'Failed to find the first block attribute' ); + $this->assertTrue( $p->next_block_attribute(), 'Failed to find the second block attribute' ); + $this->assertTrue( $p->next_block_attribute(), 'Failed to find the third block attribute' ); + + $p->set_block_attribute_value( 'medium.png' ); + $this->assertEquals( 'medium.png', $p->get_block_attribute_value(), 'Failed to find the block attribute value' ); + $this->assertEquals( '', $p->get_updated_html(), + 'Failed to update the block attribute value' ); + } + + public function test_set_block_attribute_value_updates_a_list_value() { + $p = new WP_Block_Markup_Processor( + '' + ); + $this->assertTrue( $p->next_token(), 'Failed to find the block opener' ); + $this->assertTrue( $p->next_block_attribute(), 'Failed to find the first block attribute' ); + $this->assertTrue( $p->next_block_attribute(), 'Failed to find the second block attribute' ); + $this->assertTrue( $p->next_block_attribute(), 'Failed to find the third block attribute' ); + + $p->set_block_attribute_value( 'medium.png' ); + $this->assertEquals( 'medium.png', $p->get_block_attribute_value(), 'Failed to find the block attribute value' ); + $this->assertEquals( '', $p->get_updated_html(), + 'Failed to update the block attribute value' ); + } + + public function test_set_block_attribute_can_be_called_multiple_times() { + $p = new WP_Block_Markup_Processor( + '' + ); + $this->assertTrue( $p->next_token(), 'Failed to find the block opener' ); + $this->assertTrue( $p->next_block_attribute(), 'Failed to find the first block attribute' ); + $this->assertTrue( $p->next_block_attribute(), 'Failed to find the second block attribute' ); + $this->assertTrue( $p->next_block_attribute(), 'Failed to find the third block attribute' ); + + $p->set_block_attribute_value( 'medium.png' ); + $p->set_block_attribute_value( 'oh-completely-different-image.png' ); + $this->assertEquals( 'oh-completely-different-image.png', $p->get_block_attribute_value(), + 'Failed to find the block attribute value' ); + $this->assertEquals( + '', + $p->get_updated_html(), + 'Failed to update the block attribute value' + ); + } + + public function test_set_block_attribute_value_flushes_updates_on_next_token() { + $p = new WP_Block_Markup_Processor( + 'Hello, there' + ); + $this->assertTrue( $p->next_token(), 'Failed to find the block opener' ); + $this->assertTrue( $p->next_block_attribute(), 'Failed to find the first block attribute' ); + $this->assertTrue( $p->set_block_attribute_value( 'wp-italics' ), 'Failed to update the block attribute value' ); + $this->assertTrue( $p->next_token(), 'Failed to find the text node' ); + $this->assertEquals( + 'Hello, there', + $p->get_updated_html(), + 'Failed to update the block attribute value' + ); + } +} diff --git a/packages/playground/data-liberation/tests/WPBlockMarkupUrlProcessorTests.php b/packages/playground/data-liberation/tests/WPBlockMarkupUrlProcessorTests.php new file mode 100644 index 00000000000..a93331fe702 --- /dev/null +++ b/packages/playground/data-liberation/tests/WPBlockMarkupUrlProcessorTests.php @@ -0,0 +1,191 @@ +assertFalse( $p->next_url_in_current_token() ); + } + + /** + * + * @dataProvider provider_test_finds_next_url + */ + public function test_next_url_finds_the_url($url, $markup, $base_url='https://wordpress.org') + { + $p = new WP_Block_Markup_Url_Processor($markup, $base_url); + $this->assertTrue( $p->next_url(), 'Failed to find the URL in the markup.' ); + $this->assertEquals($url, $p->get_raw_url(), 'Found a URL in the markup, but it wasn\'t the expected one.'); + } + + static public function provider_test_finds_next_url() + { + return [ + 'In the
tag' => ['https://wordpress.org', ''], + 'In the second block attribute, when it contains just the URL' => [ + 'https://mysite.com/wp-content/image.png', + '' + ], + 'In the first block attribute, when it contains just the URL' => [ + 'https://mysite.com/wp-content/image.png', + '' + ], + 'In a block attribute, in a nested object, when it contains just the URL' => [ + 'https://mysite.com/wp-content/image.png', + '' + ], + 'In a block attribute, in an array, when it contains just the URL' => [ + 'https://mysite.com/wp-content/image.png', + '' + ], + 'In a text node, when it contains a well-formed absolute URL' => [ + 'https://wordpress.org', + 'Have you seen https://wordpress.org? ' + ], + 'In a text node after a tag' => [ + 'wordpress.org', + '

Have you seen wordpress.org' + ], + 'In a text node, when it contains a protocol-relative absolute URL' => [ + '//wordpress.org', + 'Have you seen //wordpress.org? ' + ], + 'In a text node, when it contains a domain-only absolute URL' => [ + 'wordpress.org', + 'Have you seen wordpress.org? ' + ], + 'In a text node, when it contains a domain-only absolute URL with path' => [ + 'wordpress.org/plugins', + 'Have you seen wordpress.org/plugins? ' + ], + 'Matches an empty string in as a valid relative URL when given a base URL' => [ + '', + '', + 'https://wordpress.org' + ], + 'Skips over an empty string in when not given a base URL' => [ + 'https://developer.w.org', + '', + null + ], + ]; + } + + public function test_next_url_returns_false_once_theres_no_more_urls( ) { + $markup = ''; + $p = new WP_Block_Markup_Url_Processor( $markup ); + $this->assertTrue( $p->next_url(), 'Failed to find the URL in the markup.' ); + $this->assertTrue( $p->next_url(), 'Failed to find the URL in the markup.' ); + $this->assertFalse( $p->next_url(), 'Found more URLs than expected.' ); + } + + public function test_next_url_finds_urls_in_multiple_attributes( ) { + $markup = ''; + $p = new WP_Block_Markup_Url_Processor( $markup ); + $this->assertTrue( $p->next_url(), 'Failed to find the URL in the markup.' ); + $this->assertEquals( 'https://first-url.org', $p->get_raw_url(), 'Found a URL in the markup, but it wasn\'t the expected one.' ); + + $this->assertTrue( $p->next_url(), 'Failed to find the URL in the markup.' ); + $this->assertEquals( 'https://mysite.com/wp-content/image.png', $p->get_raw_url(), 'Found a URL in the markup, but it wasn\'t the expected one.' ); + } + + public function test_next_url_finds_urls_in_multiple_tags( ) { + $markup = ''; + $p = new WP_Block_Markup_Url_Processor( $markup ); + $this->assertTrue( $p->next_url(), 'Failed to find the URL in the markup.' ); + $this->assertEquals( 'https://first-url.org', $p->get_raw_url(), 'Found a URL in the markup, but it wasn\'t the expected one.' ); + + $this->assertTrue( $p->next_url(), 'Failed to find the URL in the markup.' ); + $this->assertEquals( 'https://mysite.com/wp-content/image.png', $p->get_raw_url(), 'Found a URL in the markup, but it wasn\'t the expected one.' ); + + $this->assertTrue( $p->next_url(), 'Failed to find the URL in the markup.' ); + $this->assertEquals( 'https://third-url.org', $p->get_raw_url(), 'Found a URL in the markup, but it wasn\'t the expected one.' ); + } + + /** + * + * @dataProvider provider_test_set_url_examples + */ + public function test_set_url($markup, $new_url, $new_markup) + { + $p = new WP_Block_Markup_Url_Processor($markup); + $this->assertTrue($p->next_url(), 'Failed to find the URL in the markup.'); + $this->assertTrue($p->set_raw_url($new_url), 'Failed to set the URL in the markup.'); + $this->assertEquals($new_markup, $p->get_updated_html(), 'Failed to set the URL in the markup.'); + } + + static public function provider_test_set_url_examples() + { + return [ + 'In the href attribute of an tag' => [ + '', + 'https://w.org', + '' + ], + 'In the "src" block attribute' => [ + '', + 'https://w.org', + '' + ], + 'In a text node' => [ + 'Have you seen https://wordpress.org yet?', + 'https://w.org', + 'Have you seen https://w.org yet?' + ], + ]; + } + + public function test_set_url_complex_test_case() + { + $p = new WP_Block_Markup_Url_Processor( + << + + + + +

During the Write of Passage, I stubbornly tried to beat my writer’s block by writing until 3am multiple times. The burnout returned. I dropped everything and went to Greece for a week.

+ + + +

+Have you seen my blog, adamadam.blog? I told a story there of how I got my Bachelor's degree, +check it out: https://adamadam.blog/2021/09/16/how-i-got-bachelors-in-six-months/ +

+ +HTML, + 'https://adamadam.blog' + ); + + // Replace every url with 'https://site-export.internal' + while($p->next_url()) { + $p->set_raw_url('https://site-export.internal'); + } + + $this->assertEquals( + << + + + + +

During the Write of Passage, I stubbornly tried to beat my writer’s block by writing until 3am multiple times. The burnout returned. I dropped everything and went to Greece for a week.

+ + + +

+Have you seen my blog, site-export.internal? I told a story there of how I got my Bachelor's degree, +check it out: site-export.internal +

+ +HTML, + $p->get_updated_html(), + 'Failed to update all the URLs in the markup.' + ); + } + +} diff --git a/packages/playground/data-liberation/tests/WPMigrationURLInTextProcessorTests.php b/packages/playground/data-liberation/tests/WPMigrationURLInTextProcessorTests.php new file mode 100644 index 00000000000..fbfb5d10f00 --- /dev/null +++ b/packages/playground/data-liberation/tests/WPMigrationURLInTextProcessorTests.php @@ -0,0 +1,163 @@ +assertTrue( $p->next_url(), 'Failed to find the URL in the text.' ); + } + $this->assertEquals( $url, $p->get_raw_url(), 'Found a URL in the text, but it wasn\'t the expected one.' ); + $this->assertEquals( $parsed_href, $p->get_parsed_url()->href, 'Found a URL in the text, but it wasn\'t the expected one.' ); + } + + static public function provider_test_finds_next_url_when_base_url_is_used() { + return [ + 'Absolute URL (first)' => [ + 'https://wordpress.org', + 'https://wordpress.org/', + 'Have you seen https://wordpress.org?', + ], + 'Absolute URL (second) ' => [ + 'https://w.org', + 'https://w.org/', + 'Have you seen https://wordpress.org or https://w.org?', + 2, + ], + 'Absolute URL with an all-ASCII path' => [ + 'https://w.org/plugins', + 'https://w.org/plugins', + 'Visit the WordPress plugins directory https://w.org/plugins', + ], + 'Absolute URL with a UTF-8 query' => [ + 'https://w.org/plugins?łąka=1', + 'https://w.org/plugins?%C5%82%C4%85ka=1', + 'Visit the WordPress plugins directory https://w.org/plugins?łąka=1', + ], + 'Absolute URL with a urlencoded all-ASCII query' => [ + 'https://w.org/plugins?%C5%82%C4%85ka=1', + 'https://w.org/plugins?%C5%82%C4%85ka=1', + 'Visit the WordPress plugins directory https://w.org/plugins?%C5%82%C4%85ka=1', + ], + 'Protocol-relative URL' => [ '//w.org/', 'https://w.org/', 'Visit the WordPress org at //w.org/ ' ], + 'UTF-8 characters in the path' => [ + 'https://w.org/łąka', + 'https://w.org/%C5%82%C4%85ka', + 'Visit the WordPress plugins directory https://w.org/łąka', + ], + 'Domain-only' => [ 'www.example.com', 'https://www.example.com/', 'Visit www.example.com' ], + 'Domain + path' => [ + 'www.example.com/path', + 'https://www.example.com/path', + 'Visit www.example.com/path', + ], + 'UTF-8 domain' => [ 'łąka.pl', 'https://xn--ka-ola7v.pl/', 'Więcej na łąka.pl' ], + 'Skips the closing parenthesis after the path' => [ + 'https://w.org/plugins', + 'https://w.org/plugins', + 'Visit the WordPress plugins directory (https://w.org/plugins)', + ], + 'Retains the parenthesis within the path' => [ + 'https://w.org/plug(in)s', + 'https://w.org/plug(in)s', + 'Visit the WordPress plugins directory (https://w.org/plug(in)s', + ], + 'Skips index.html' => [ + 'www.example.com', + 'https://www.example.com/', + 'Visit index.html www.example.com', + ], + ]; + } + + public function test_set_url_returns_true_on_success() { + $p = new WP_Migration_URL_In_Text_Processor( 'Have you seen https://wordpress.org?' ); + $p->next_url(); + $this->assertTrue( $p->set_raw_url( 'https://w.org' ), 'Failed to set the URL in the text.' ); + } + + public function test_set_url_returns_false_on_failure() { + $p = new WP_Migration_URL_In_Text_Processor( 'Have you seen WordPress?' ); + $p->next_url(); + $this->assertFalse( $p->set_raw_url( 'https://w.org' ), 'set_url returned true when no URL was matched.' ); + } + + /** + * + * @dataProvider provider_test_set_url_data + */ + public function test_set_url_replaces_the_url( $text, $new_url, $expected_text ) { + $p = new WP_Migration_URL_In_Text_Processor( $text ); + $p->next_url(); + $p->set_raw_url( $new_url ); + $this->assertEquals( + $new_url, + $p->get_raw_url(), + 'Failed to set the URL in the text.' + ); + $this->assertEquals( + $expected_text, + $p->get_updated_text(), + 'Failed to set the URL in the text.' + ); + } + + static public function provider_test_set_url_data() { + return [ + 'Replace with HTTPS URL' => [ + 'Have you seen https://wordpress.org (or wp.org)?', + 'https://wikipedia.org', + 'Have you seen https://wikipedia.org (or wp.org)?', + ], + 'Replace with a protocol-relative URL' => [ + 'Have you seen https://wordpress.org (or wp.org)?', + '//wikipedia.org', + 'Have you seen //wikipedia.org (or wp.org)?', + ], + 'Replace with a schema-less URL' => [ + 'Have you seen https://wordpress.org (or wp.org)?', + 'wikipedia.org', + 'Have you seen wikipedia.org (or wp.org)?', + ], + ]; + } + + public function test_set_url_can_be_called_twice() { + $p = new WP_Migration_URL_In_Text_Processor( 'Have you seen https://wordpress.org (or w.org)?' ); + $p->next_url(); + $p->set_raw_url( 'https://developer.wordpress.org' ); + $p->get_updated_text(); + $p->set_raw_url( 'https://wikipedia.org' ); + $this->assertEquals( + 'https://wikipedia.org', + $p->get_raw_url(), + 'Failed to set the URL in the text.' + ); + $this->assertEquals( + 'Have you seen https://wikipedia.org (or w.org)?', + $p->get_updated_text(), + 'Failed to set the URL in the text.' + ); + } + + public function test_set_url_can_be_called_twice_before_moving_on() { + $p = new WP_Migration_URL_In_Text_Processor( 'Have you seen https://wordpress.org (or w.org)?', 'https://w.org' ); + $p->next_url(); + $p->set_raw_url( 'https://wikipedia.org' ); + $p->get_updated_text(); + $p->set_raw_url( 'https://developer.wordpress.org' ); + $p->next_url(); + $p->set_raw_url( 'https://meetups.wordpress.org' ); + $this->assertEquals( + 'Have you seen https://developer.wordpress.org (or meetups.wordpress.org)?', + $p->get_updated_text(), + 'Failed to set the URL in the text.' + ); + } +} diff --git a/packages/playground/data-liberation/tests/WPRewriteUrlsTests.php b/packages/playground/data-liberation/tests/WPRewriteUrlsTests.php new file mode 100644 index 00000000000..b4fb9f83e9a --- /dev/null +++ b/packages/playground/data-liberation/tests/WPRewriteUrlsTests.php @@ -0,0 +1,149 @@ + $original_markup, + 'current-site-url' => $current_site_url, + 'new-site-url' => $new_site_url, + ) ); + $this->assertEquals( $expected_markup, $result, 'Failed to migrate the URLs in the block markup' ); + } + + static public function provider_test_wp_rewrite_urls() { + return [ + 'Domain in a block attribute' => [ + '', + '', + 'https://legacy-blog.com', + 'https://modern-webstore.org' + ], + 'Domain in a block attribute expressed with JSON UTF-8 escape sequences' => [ + '', + '', + 'https://legacy-blog.com', + 'https://modern-webstore.org' + ], + 'Domain in an HTML attribute semantically expressing a URL' => [ + <<Contact us + + + + HTML, + <<Contact us + + + + HTML, + 'https://legacy-blog.com', + 'https://modern-webstore.org' + ], + 'Path in an HTML attribute semantically expressing a URL – source path has no trailing slash' => [ + 'Nuclear fusion', + 'Nuclear fusion', + 'https://legacy-blog.com/~jappleseed/1997.10.1', + 'https://modern-webstore.org/blog/' + ], + 'Path in an HTML attribute semantically expressing a URL – source path has a trailing slash' => [ + 'Nuclear fusion', + 'Nuclear fusion', + 'https://legacy-blog.com/~jappleseed/1997.10.1/', + 'https://modern-webstore.org/blog/' + ], + 'Domain in an HTML attribute – encoded using HTML entities' => [ + 'Contact us', + 'Contact us', + 'https://legacy-blog.com', + 'https://modern-webstore.org' + ], + // 'Domain in an HTML attribute semantically expressing text' => [ + // 'Johnny Appleseed, the founder of https://legacy-blog.com/', + // 'Johnny Appleseed, the founder of https://modern-webstore.org/', + // 'https://legacy-blog.com', + // 'https://modern-webstore.org' + // ], + // @TODO Is that actually a thing? Can we distinguish between "special tokens" + // (such as CSS classes) and "text" (such as the alt attribute)? + // 'Ignores domains in HTML attributes semantically expressing data different that text or URLs' => [ + // '

CSS quirks – anything can be a class

', + // '

CSS quirks – anything can be a class

', + // 'https://legacy-blog.com', + // 'https://modern-webstore.org' + // ], + "Domain in a regular text snippet – preceeded by a protocol" => [ + 'Join the team at https://legacy-blog.com/we-are-hiring', + 'Join the team at https://modern-webstore.org/we-are-hiring', + 'https://legacy-blog.com', + 'https://modern-webstore.org' + ], + "Domain in a regular text snippet – not preceeded by a protocol" => [ + 'Join the team at legacy-blog.com/we-are-hiring', + 'Join the team at modern-webstore.org/we-are-hiring', + 'https://legacy-blog.com', + 'https://modern-webstore.org' + ], + 'Domain in a regular text snippet – preceeded by a protocol – encoded using HTML entities' => [ + 'Join the team at https://legacy-blog.com/pages/contact-us', + 'Join the team at https://modern-webstore.org/pages/contact-us', + 'https://legacy-blog.com', + 'https://modern-webstore.org' + ], + 'Domain in a regular text snippet – no protocol – encoded using HTML entities' => [ + 'Join the team at legacy-blog.com/pages/contact-us', + 'Join the team at modern-webstore.org/pages/contact-us', + 'https://legacy-blog.com', + 'https://modern-webstore.org' + ], + "Ignores lookalikes: retains legacy-blog.comdot in a regular text snippet when migrating legacy-blog.com" => [ + 'Join the team at legacy-blog.comdot/we-are-hiring', + 'Join the team at legacy-blog.comdot/we-are-hiring', + 'https://legacy-blog.com', + 'https://modern-webstore.org' + ], + ]; + } + + + /** + * + * @dataProvider provider_diverse_domains + */ + public function test_wp_rewrite_url_migrates_domains_in_a_href( + $domain_for_markup, + $domain_for_lookup = null + ) { + $current_site_url = $domain_for_lookup ? "https://$domain_for_lookup" : "https://$domain_for_markup"; + $new_site_url = "https://wordpress.org"; + $result = wp_rewrite_urls( array( + 'block_markup' => "", + 'current-site-url' => $current_site_url, + 'new-site-url' => $new_site_url, + ) ); + $this->assertEquals( "", $result, 'Failed to migrate the domain found in ' ); + } + + static public function provider_diverse_domains() { + return [ + "Regular ascii" => [ 'rocket-science.com' ], + "Prefixed with an emoji" => [ '🚀-science.com' ], + "Emoji-only – lookup by emoji notation" => [ '🚀.com', '🚀.com' ], + "Emoji-only – lookup by punycode notation" => [ '🚀.com', 'xn---science-7f85g.com' ], + "Punycode-encoded – lookup by punycode notation" => [ 'xn---science-7f85g.com', 'xn---science-7f85g.com' ], + "Punycode-encoded – lookup by emoji notation" => [ 'xn---science-7f85g.com', '🚀.com' ], + ]; + } +} diff --git a/packages/playground/data-liberation/tests/whatwg_url_test_data.json b/packages/playground/data-liberation/tests/whatwg_url_test_data.json new file mode 100644 index 00000000000..fb49b8d1ddf --- /dev/null +++ b/packages/playground/data-liberation/tests/whatwg_url_test_data.json @@ -0,0 +1,9801 @@ +[ + "See ../README.md for a description of the format.", + { + "input": "http://example\t.\norg", + "base": "http://example.org/foo/bar", + "href": "http://example.org/", + "origin": "http://example.org", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.org", + "hostname": "example.org", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "http://user:pass@foo:21/bar;par?b#c", + "base": "http://example.org/foo/bar", + "href": "http://user:pass@foo:21/bar;par?b#c", + "origin": "http://foo:21", + "protocol": "http:", + "username": "user", + "password": "pass", + "host": "foo:21", + "hostname": "foo", + "port": "21", + "pathname": "/bar;par", + "search": "?b", + "hash": "#c" + }, + { + "input": "https://test:@test", + "base": null, + "href": "https://test@test/", + "origin": "https://test", + "protocol": "https:", + "username": "test", + "password": "", + "host": "test", + "hostname": "test", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "https://:@test", + "base": null, + "href": "https://test/", + "origin": "https://test", + "protocol": "https:", + "username": "", + "password": "", + "host": "test", + "hostname": "test", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "non-special://test:@test/x", + "base": null, + "href": "non-special://test@test/x", + "origin": "null", + "protocol": "non-special:", + "username": "test", + "password": "", + "host": "test", + "hostname": "test", + "port": "", + "pathname": "/x", + "search": "", + "hash": "" + }, + { + "input": "non-special://:@test/x", + "base": null, + "href": "non-special://test/x", + "origin": "null", + "protocol": "non-special:", + "username": "", + "password": "", + "host": "test", + "hostname": "test", + "port": "", + "pathname": "/x", + "search": "", + "hash": "" + }, + { + "input": "http:foo.com", + "base": "http://example.org/foo/bar", + "href": "http://example.org/foo/foo.com", + "origin": "http://example.org", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.org", + "hostname": "example.org", + "port": "", + "pathname": "/foo/foo.com", + "search": "", + "hash": "" + }, + { + "input": "\t :foo.com \n", + "base": "http://example.org/foo/bar", + "href": "http://example.org/foo/:foo.com", + "origin": "http://example.org", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.org", + "hostname": "example.org", + "port": "", + "pathname": "/foo/:foo.com", + "search": "", + "hash": "" + }, + { + "input": " foo.com ", + "base": "http://example.org/foo/bar", + "href": "http://example.org/foo/foo.com", + "origin": "http://example.org", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.org", + "hostname": "example.org", + "port": "", + "pathname": "/foo/foo.com", + "search": "", + "hash": "" + }, + { + "input": "a:\t foo.com", + "base": "http://example.org/foo/bar", + "href": "a: foo.com", + "origin": "null", + "protocol": "a:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": " foo.com", + "search": "", + "hash": "" + }, + { + "input": "http://f:21/ b ? d # e ", + "base": "http://example.org/foo/bar", + "href": "http://f:21/%20b%20?%20d%20#%20e", + "origin": "http://f:21", + "protocol": "http:", + "username": "", + "password": "", + "host": "f:21", + "hostname": "f", + "port": "21", + "pathname": "/%20b%20", + "search": "?%20d%20", + "hash": "#%20e" + }, + { + "input": "lolscheme:x x#x x", + "base": null, + "href": "lolscheme:x x#x%20x", + "protocol": "lolscheme:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "x x", + "search": "", + "hash": "#x%20x" + }, + { + "input": "http://f:/c", + "base": "http://example.org/foo/bar", + "href": "http://f/c", + "origin": "http://f", + "protocol": "http:", + "username": "", + "password": "", + "host": "f", + "hostname": "f", + "port": "", + "pathname": "/c", + "search": "", + "hash": "" + }, + { + "input": "http://f:0/c", + "base": "http://example.org/foo/bar", + "href": "http://f:0/c", + "origin": "http://f:0", + "protocol": "http:", + "username": "", + "password": "", + "host": "f:0", + "hostname": "f", + "port": "0", + "pathname": "/c", + "search": "", + "hash": "" + }, + { + "input": "http://f:00000000000000/c", + "base": "http://example.org/foo/bar", + "href": "http://f:0/c", + "origin": "http://f:0", + "protocol": "http:", + "username": "", + "password": "", + "host": "f:0", + "hostname": "f", + "port": "0", + "pathname": "/c", + "search": "", + "hash": "" + }, + { + "input": "http://f:00000000000000000000080/c", + "base": "http://example.org/foo/bar", + "href": "http://f/c", + "origin": "http://f", + "protocol": "http:", + "username": "", + "password": "", + "host": "f", + "hostname": "f", + "port": "", + "pathname": "/c", + "search": "", + "hash": "" + }, + { + "input": "http://f:b/c", + "base": "http://example.org/foo/bar", + "failure": true + }, + { + "input": "http://f: /c", + "base": "http://example.org/foo/bar", + "failure": true + }, + { + "input": "http://f:\n/c", + "base": "http://example.org/foo/bar", + "href": "http://f/c", + "origin": "http://f", + "protocol": "http:", + "username": "", + "password": "", + "host": "f", + "hostname": "f", + "port": "", + "pathname": "/c", + "search": "", + "hash": "" + }, + { + "input": "http://f:fifty-two/c", + "base": "http://example.org/foo/bar", + "failure": true + }, + { + "input": "http://f:999999/c", + "base": "http://example.org/foo/bar", + "failure": true + }, + { + "input": "non-special://f:999999/c", + "base": "http://example.org/foo/bar", + "failure": true + }, + { + "input": "http://f: 21 / b ? d # e ", + "base": "http://example.org/foo/bar", + "failure": true + }, + { + "input": "", + "base": "http://example.org/foo/bar", + "href": "http://example.org/foo/bar", + "origin": "http://example.org", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.org", + "hostname": "example.org", + "port": "", + "pathname": "/foo/bar", + "search": "", + "hash": "" + }, + { + "input": " \t", + "base": "http://example.org/foo/bar", + "href": "http://example.org/foo/bar", + "origin": "http://example.org", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.org", + "hostname": "example.org", + "port": "", + "pathname": "/foo/bar", + "search": "", + "hash": "" + }, + { + "input": ":foo.com/", + "base": "http://example.org/foo/bar", + "href": "http://example.org/foo/:foo.com/", + "origin": "http://example.org", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.org", + "hostname": "example.org", + "port": "", + "pathname": "/foo/:foo.com/", + "search": "", + "hash": "" + }, + { + "input": ":foo.com\\", + "base": "http://example.org/foo/bar", + "href": "http://example.org/foo/:foo.com/", + "origin": "http://example.org", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.org", + "hostname": "example.org", + "port": "", + "pathname": "/foo/:foo.com/", + "search": "", + "hash": "" + }, + { + "input": ":", + "base": "http://example.org/foo/bar", + "href": "http://example.org/foo/:", + "origin": "http://example.org", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.org", + "hostname": "example.org", + "port": "", + "pathname": "/foo/:", + "search": "", + "hash": "" + }, + { + "input": ":a", + "base": "http://example.org/foo/bar", + "href": "http://example.org/foo/:a", + "origin": "http://example.org", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.org", + "hostname": "example.org", + "port": "", + "pathname": "/foo/:a", + "search": "", + "hash": "" + }, + { + "input": ":/", + "base": "http://example.org/foo/bar", + "href": "http://example.org/foo/:/", + "origin": "http://example.org", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.org", + "hostname": "example.org", + "port": "", + "pathname": "/foo/:/", + "search": "", + "hash": "" + }, + { + "input": ":\\", + "base": "http://example.org/foo/bar", + "href": "http://example.org/foo/:/", + "origin": "http://example.org", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.org", + "hostname": "example.org", + "port": "", + "pathname": "/foo/:/", + "search": "", + "hash": "" + }, + { + "input": ":#", + "base": "http://example.org/foo/bar", + "href": "http://example.org/foo/:#", + "origin": "http://example.org", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.org", + "hostname": "example.org", + "port": "", + "pathname": "/foo/:", + "search": "", + "hash": "" + }, + { + "input": "#", + "base": "http://example.org/foo/bar", + "href": "http://example.org/foo/bar#", + "origin": "http://example.org", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.org", + "hostname": "example.org", + "port": "", + "pathname": "/foo/bar", + "search": "", + "hash": "" + }, + { + "input": "#/", + "base": "http://example.org/foo/bar", + "href": "http://example.org/foo/bar#/", + "origin": "http://example.org", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.org", + "hostname": "example.org", + "port": "", + "pathname": "/foo/bar", + "search": "", + "hash": "#/" + }, + { + "input": "#\\", + "base": "http://example.org/foo/bar", + "href": "http://example.org/foo/bar#\\", + "origin": "http://example.org", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.org", + "hostname": "example.org", + "port": "", + "pathname": "/foo/bar", + "search": "", + "hash": "#\\" + }, + { + "input": "#;?", + "base": "http://example.org/foo/bar", + "href": "http://example.org/foo/bar#;?", + "origin": "http://example.org", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.org", + "hostname": "example.org", + "port": "", + "pathname": "/foo/bar", + "search": "", + "hash": "#;?" + }, + { + "input": "?", + "base": "http://example.org/foo/bar", + "href": "http://example.org/foo/bar?", + "origin": "http://example.org", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.org", + "hostname": "example.org", + "port": "", + "pathname": "/foo/bar", + "search": "", + "hash": "" + }, + { + "input": "/", + "base": "http://example.org/foo/bar", + "href": "http://example.org/", + "origin": "http://example.org", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.org", + "hostname": "example.org", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": ":23", + "base": "http://example.org/foo/bar", + "href": "http://example.org/foo/:23", + "origin": "http://example.org", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.org", + "hostname": "example.org", + "port": "", + "pathname": "/foo/:23", + "search": "", + "hash": "" + }, + { + "input": "/:23", + "base": "http://example.org/foo/bar", + "href": "http://example.org/:23", + "origin": "http://example.org", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.org", + "hostname": "example.org", + "port": "", + "pathname": "/:23", + "search": "", + "hash": "" + }, + { + "input": "\\x", + "base": "http://example.org/foo/bar", + "href": "http://example.org/x", + "origin": "http://example.org", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.org", + "hostname": "example.org", + "port": "", + "pathname": "/x", + "search": "", + "hash": "" + }, + { + "input": "\\\\x\\hello", + "base": "http://example.org/foo/bar", + "href": "http://x/hello", + "origin": "http://x", + "protocol": "http:", + "username": "", + "password": "", + "host": "x", + "hostname": "x", + "port": "", + "pathname": "/hello", + "search": "", + "hash": "" + }, + { + "input": "::", + "base": "http://example.org/foo/bar", + "href": "http://example.org/foo/::", + "origin": "http://example.org", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.org", + "hostname": "example.org", + "port": "", + "pathname": "/foo/::", + "search": "", + "hash": "" + }, + { + "input": "::23", + "base": "http://example.org/foo/bar", + "href": "http://example.org/foo/::23", + "origin": "http://example.org", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.org", + "hostname": "example.org", + "port": "", + "pathname": "/foo/::23", + "search": "", + "hash": "" + }, + { + "input": "foo://", + "base": "http://example.org/foo/bar", + "href": "foo://", + "origin": "null", + "protocol": "foo:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "", + "search": "", + "hash": "" + }, + { + "input": "http://a:b@c:29/d", + "base": "http://example.org/foo/bar", + "href": "http://a:b@c:29/d", + "origin": "http://c:29", + "protocol": "http:", + "username": "a", + "password": "b", + "host": "c:29", + "hostname": "c", + "port": "29", + "pathname": "/d", + "search": "", + "hash": "" + }, + { + "input": "http::@c:29", + "base": "http://example.org/foo/bar", + "href": "http://example.org/foo/:@c:29", + "origin": "http://example.org", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.org", + "hostname": "example.org", + "port": "", + "pathname": "/foo/:@c:29", + "search": "", + "hash": "" + }, + { + "input": "http://&a:foo(b]c@d:2/", + "base": "http://example.org/foo/bar", + "href": "http://&a:foo(b%5Dc@d:2/", + "origin": "http://d:2", + "protocol": "http:", + "username": "&a", + "password": "foo(b%5Dc", + "host": "d:2", + "hostname": "d", + "port": "2", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "http://::@c@d:2", + "base": "http://example.org/foo/bar", + "href": "http://:%3A%40c@d:2/", + "origin": "http://d:2", + "protocol": "http:", + "username": "", + "password": "%3A%40c", + "host": "d:2", + "hostname": "d", + "port": "2", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "http://foo.com:b@d/", + "base": "http://example.org/foo/bar", + "href": "http://foo.com:b@d/", + "origin": "http://d", + "protocol": "http:", + "username": "foo.com", + "password": "b", + "host": "d", + "hostname": "d", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "http://foo.com/\\@", + "base": "http://example.org/foo/bar", + "href": "http://foo.com//@", + "origin": "http://foo.com", + "protocol": "http:", + "username": "", + "password": "", + "host": "foo.com", + "hostname": "foo.com", + "port": "", + "pathname": "//@", + "search": "", + "hash": "" + }, + { + "input": "http:\\\\foo.com\\", + "base": "http://example.org/foo/bar", + "href": "http://foo.com/", + "origin": "http://foo.com", + "protocol": "http:", + "username": "", + "password": "", + "host": "foo.com", + "hostname": "foo.com", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "http:\\\\a\\b:c\\d@foo.com\\", + "base": "http://example.org/foo/bar", + "href": "http://a/b:c/d@foo.com/", + "origin": "http://a", + "protocol": "http:", + "username": "", + "password": "", + "host": "a", + "hostname": "a", + "port": "", + "pathname": "/b:c/d@foo.com/", + "search": "", + "hash": "" + }, + { + "input": "http://a:b@c\\", + "base": null, + "href": "http://a:b@c/", + "origin": "http://c", + "protocol": "http:", + "username": "a", + "password": "b", + "host": "c", + "hostname": "c", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "ws://a@b\\c", + "base": null, + "href": "ws://a@b/c", + "origin": "ws://b", + "protocol": "ws:", + "username": "a", + "password": "", + "host": "b", + "hostname": "b", + "port": "", + "pathname": "/c", + "search": "", + "hash": "" + }, + { + "input": "foo:/", + "base": "http://example.org/foo/bar", + "href": "foo:/", + "origin": "null", + "protocol": "foo:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "foo:/bar.com/", + "base": "http://example.org/foo/bar", + "href": "foo:/bar.com/", + "origin": "null", + "protocol": "foo:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/bar.com/", + "search": "", + "hash": "" + }, + { + "input": "foo://///////", + "base": "http://example.org/foo/bar", + "href": "foo://///////", + "origin": "null", + "protocol": "foo:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "///////", + "search": "", + "hash": "" + }, + { + "input": "foo://///////bar.com/", + "base": "http://example.org/foo/bar", + "href": "foo://///////bar.com/", + "origin": "null", + "protocol": "foo:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "///////bar.com/", + "search": "", + "hash": "" + }, + { + "input": "foo:////://///", + "base": "http://example.org/foo/bar", + "href": "foo:////://///", + "origin": "null", + "protocol": "foo:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "//://///", + "search": "", + "hash": "" + }, + { + "input": "c:/foo", + "base": "http://example.org/foo/bar", + "href": "c:/foo", + "origin": "null", + "protocol": "c:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/foo", + "search": "", + "hash": "" + }, + { + "input": "//foo/bar", + "base": "http://example.org/foo/bar", + "href": "http://foo/bar", + "origin": "http://foo", + "protocol": "http:", + "username": "", + "password": "", + "host": "foo", + "hostname": "foo", + "port": "", + "pathname": "/bar", + "search": "", + "hash": "" + }, + { + "input": "http://foo/path;a??e#f#g", + "base": "http://example.org/foo/bar", + "href": "http://foo/path;a??e#f#g", + "origin": "http://foo", + "protocol": "http:", + "username": "", + "password": "", + "host": "foo", + "hostname": "foo", + "port": "", + "pathname": "/path;a", + "search": "??e", + "hash": "#f#g" + }, + { + "input": "http://foo/abcd?efgh?ijkl", + "base": "http://example.org/foo/bar", + "href": "http://foo/abcd?efgh?ijkl", + "origin": "http://foo", + "protocol": "http:", + "username": "", + "password": "", + "host": "foo", + "hostname": "foo", + "port": "", + "pathname": "/abcd", + "search": "?efgh?ijkl", + "hash": "" + }, + { + "input": "http://foo/abcd#foo?bar", + "base": "http://example.org/foo/bar", + "href": "http://foo/abcd#foo?bar", + "origin": "http://foo", + "protocol": "http:", + "username": "", + "password": "", + "host": "foo", + "hostname": "foo", + "port": "", + "pathname": "/abcd", + "search": "", + "hash": "#foo?bar" + }, + { + "input": "[61:24:74]:98", + "base": "http://example.org/foo/bar", + "href": "http://example.org/foo/[61:24:74]:98", + "origin": "http://example.org", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.org", + "hostname": "example.org", + "port": "", + "pathname": "/foo/[61:24:74]:98", + "search": "", + "hash": "" + }, + { + "input": "http:[61:27]/:foo", + "base": "http://example.org/foo/bar", + "href": "http://example.org/foo/[61:27]/:foo", + "origin": "http://example.org", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.org", + "hostname": "example.org", + "port": "", + "pathname": "/foo/[61:27]/:foo", + "search": "", + "hash": "" + }, + { + "input": "http://[1::2]:3:4", + "base": "http://example.org/foo/bar", + "failure": true + }, + { + "input": "http://2001::1", + "base": "http://example.org/foo/bar", + "failure": true + }, + { + "input": "http://2001::1]", + "base": "http://example.org/foo/bar", + "failure": true + }, + { + "input": "http://2001::1]:80", + "base": "http://example.org/foo/bar", + "failure": true + }, + { + "input": "http://[2001::1]", + "base": "http://example.org/foo/bar", + "href": "http://[2001::1]/", + "origin": "http://[2001::1]", + "protocol": "http:", + "username": "", + "password": "", + "host": "[2001::1]", + "hostname": "[2001::1]", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "http://[::127.0.0.1]", + "base": "http://example.org/foo/bar", + "href": "http://[::7f00:1]/", + "origin": "http://[::7f00:1]", + "protocol": "http:", + "username": "", + "password": "", + "host": "[::7f00:1]", + "hostname": "[::7f00:1]", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "http://[::127.0.0.1.]", + "base": "http://example.org/foo/bar", + "failure": true + }, + { + "input": "http://[0:0:0:0:0:0:13.1.68.3]", + "base": "http://example.org/foo/bar", + "href": "http://[::d01:4403]/", + "origin": "http://[::d01:4403]", + "protocol": "http:", + "username": "", + "password": "", + "host": "[::d01:4403]", + "hostname": "[::d01:4403]", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "http://[2001::1]:80", + "base": "http://example.org/foo/bar", + "href": "http://[2001::1]/", + "origin": "http://[2001::1]", + "protocol": "http:", + "username": "", + "password": "", + "host": "[2001::1]", + "hostname": "[2001::1]", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "http:/example.com/", + "base": "http://example.org/foo/bar", + "href": "http://example.org/example.com/", + "origin": "http://example.org", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.org", + "hostname": "example.org", + "port": "", + "pathname": "/example.com/", + "search": "", + "hash": "" + }, + { + "input": "ftp:/example.com/", + "base": "http://example.org/foo/bar", + "href": "ftp://example.com/", + "origin": "ftp://example.com", + "protocol": "ftp:", + "username": "", + "password": "", + "host": "example.com", + "hostname": "example.com", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "https:/example.com/", + "base": "http://example.org/foo/bar", + "href": "https://example.com/", + "origin": "https://example.com", + "protocol": "https:", + "username": "", + "password": "", + "host": "example.com", + "hostname": "example.com", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "madeupscheme:/example.com/", + "base": "http://example.org/foo/bar", + "href": "madeupscheme:/example.com/", + "origin": "null", + "protocol": "madeupscheme:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/example.com/", + "search": "", + "hash": "" + }, + { + "input": "file:/example.com/", + "base": "http://example.org/foo/bar", + "href": "file:///example.com/", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/example.com/", + "search": "", + "hash": "" + }, + { + "input": "file://example:1/", + "base": null, + "failure": true + }, + { + "input": "file://example:test/", + "base": null, + "failure": true + }, + { + "input": "file://example%/", + "base": null, + "failure": true + }, + { + "input": "file://[example]/", + "base": null, + "failure": true + }, + { + "input": "ftps:/example.com/", + "base": "http://example.org/foo/bar", + "href": "ftps:/example.com/", + "origin": "null", + "protocol": "ftps:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/example.com/", + "search": "", + "hash": "" + }, + { + "input": "gopher:/example.com/", + "base": "http://example.org/foo/bar", + "href": "gopher:/example.com/", + "origin": "null", + "protocol": "gopher:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/example.com/", + "search": "", + "hash": "" + }, + { + "input": "ws:/example.com/", + "base": "http://example.org/foo/bar", + "href": "ws://example.com/", + "origin": "ws://example.com", + "protocol": "ws:", + "username": "", + "password": "", + "host": "example.com", + "hostname": "example.com", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "wss:/example.com/", + "base": "http://example.org/foo/bar", + "href": "wss://example.com/", + "origin": "wss://example.com", + "protocol": "wss:", + "username": "", + "password": "", + "host": "example.com", + "hostname": "example.com", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "data:/example.com/", + "base": "http://example.org/foo/bar", + "href": "data:/example.com/", + "origin": "null", + "protocol": "data:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/example.com/", + "search": "", + "hash": "" + }, + { + "input": "javascript:/example.com/", + "base": "http://example.org/foo/bar", + "href": "javascript:/example.com/", + "origin": "null", + "protocol": "javascript:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/example.com/", + "search": "", + "hash": "" + }, + { + "input": "mailto:/example.com/", + "base": "http://example.org/foo/bar", + "href": "mailto:/example.com/", + "origin": "null", + "protocol": "mailto:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/example.com/", + "search": "", + "hash": "" + }, + { + "input": "http:example.com/", + "base": "http://example.org/foo/bar", + "href": "http://example.org/foo/example.com/", + "origin": "http://example.org", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.org", + "hostname": "example.org", + "port": "", + "pathname": "/foo/example.com/", + "search": "", + "hash": "" + }, + { + "input": "ftp:example.com/", + "base": "http://example.org/foo/bar", + "href": "ftp://example.com/", + "origin": "ftp://example.com", + "protocol": "ftp:", + "username": "", + "password": "", + "host": "example.com", + "hostname": "example.com", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "https:example.com/", + "base": "http://example.org/foo/bar", + "href": "https://example.com/", + "origin": "https://example.com", + "protocol": "https:", + "username": "", + "password": "", + "host": "example.com", + "hostname": "example.com", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "madeupscheme:example.com/", + "base": "http://example.org/foo/bar", + "href": "madeupscheme:example.com/", + "origin": "null", + "protocol": "madeupscheme:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "example.com/", + "search": "", + "hash": "" + }, + { + "input": "ftps:example.com/", + "base": "http://example.org/foo/bar", + "href": "ftps:example.com/", + "origin": "null", + "protocol": "ftps:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "example.com/", + "search": "", + "hash": "" + }, + { + "input": "gopher:example.com/", + "base": "http://example.org/foo/bar", + "href": "gopher:example.com/", + "origin": "null", + "protocol": "gopher:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "example.com/", + "search": "", + "hash": "" + }, + { + "input": "ws:example.com/", + "base": "http://example.org/foo/bar", + "href": "ws://example.com/", + "origin": "ws://example.com", + "protocol": "ws:", + "username": "", + "password": "", + "host": "example.com", + "hostname": "example.com", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "wss:example.com/", + "base": "http://example.org/foo/bar", + "href": "wss://example.com/", + "origin": "wss://example.com", + "protocol": "wss:", + "username": "", + "password": "", + "host": "example.com", + "hostname": "example.com", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "data:example.com/", + "base": "http://example.org/foo/bar", + "href": "data:example.com/", + "origin": "null", + "protocol": "data:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "example.com/", + "search": "", + "hash": "" + }, + { + "input": "javascript:example.com/", + "base": "http://example.org/foo/bar", + "href": "javascript:example.com/", + "origin": "null", + "protocol": "javascript:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "example.com/", + "search": "", + "hash": "" + }, + { + "input": "mailto:example.com/", + "base": "http://example.org/foo/bar", + "href": "mailto:example.com/", + "origin": "null", + "protocol": "mailto:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "example.com/", + "search": "", + "hash": "" + }, + { + "input": "/a/b/c", + "base": "http://example.org/foo/bar", + "href": "http://example.org/a/b/c", + "origin": "http://example.org", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.org", + "hostname": "example.org", + "port": "", + "pathname": "/a/b/c", + "search": "", + "hash": "" + }, + { + "input": "/a/ /c", + "base": "http://example.org/foo/bar", + "href": "http://example.org/a/%20/c", + "origin": "http://example.org", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.org", + "hostname": "example.org", + "port": "", + "pathname": "/a/%20/c", + "search": "", + "hash": "" + }, + { + "input": "/a%2fc", + "base": "http://example.org/foo/bar", + "href": "http://example.org/a%2fc", + "origin": "http://example.org", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.org", + "hostname": "example.org", + "port": "", + "pathname": "/a%2fc", + "search": "", + "hash": "" + }, + { + "input": "/a/%2f/c", + "base": "http://example.org/foo/bar", + "href": "http://example.org/a/%2f/c", + "origin": "http://example.org", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.org", + "hostname": "example.org", + "port": "", + "pathname": "/a/%2f/c", + "search": "", + "hash": "" + }, + { + "input": "#β", + "base": "http://example.org/foo/bar", + "href": "http://example.org/foo/bar#%CE%B2", + "origin": "http://example.org", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.org", + "hostname": "example.org", + "port": "", + "pathname": "/foo/bar", + "search": "", + "hash": "#%CE%B2" + }, + { + "input": "data:text/html,test#test", + "base": "http://example.org/foo/bar", + "href": "data:text/html,test#test", + "origin": "null", + "protocol": "data:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "text/html,test", + "search": "", + "hash": "#test" + }, + { + "input": "tel:1234567890", + "base": "http://example.org/foo/bar", + "href": "tel:1234567890", + "origin": "null", + "protocol": "tel:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "1234567890", + "search": "", + "hash": "" + }, + "# Based on https://felixfbecker.github.io/whatwg-url-custom-host-repro/", + { + "input": "ssh://example.com/foo/bar.git", + "base": "http://example.org/", + "href": "ssh://example.com/foo/bar.git", + "origin": "null", + "protocol": "ssh:", + "username": "", + "password": "", + "host": "example.com", + "hostname": "example.com", + "port": "", + "pathname": "/foo/bar.git", + "search": "", + "hash": "" + }, + "# Based on http://trac.webkit.org/browser/trunk/LayoutTests/fast/url/file.html", + { + "input": "file:c:\\foo\\bar.html", + "base": "file:///tmp/mock/path", + "href": "file:///c:/foo/bar.html", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/c:/foo/bar.html", + "search": "", + "hash": "" + }, + { + "input": " File:c|////foo\\bar.html", + "base": "file:///tmp/mock/path", + "href": "file:///c:////foo/bar.html", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/c:////foo/bar.html", + "search": "", + "hash": "" + }, + { + "input": "C|/foo/bar", + "base": "file:///tmp/mock/path", + "href": "file:///C:/foo/bar", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/C:/foo/bar", + "search": "", + "hash": "" + }, + { + "input": "/C|\\foo\\bar", + "base": "file:///tmp/mock/path", + "href": "file:///C:/foo/bar", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/C:/foo/bar", + "search": "", + "hash": "" + }, + { + "input": "//C|/foo/bar", + "base": "file:///tmp/mock/path", + "href": "file:///C:/foo/bar", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/C:/foo/bar", + "search": "", + "hash": "" + }, + { + "input": "//server/file", + "base": "file:///tmp/mock/path", + "href": "file://server/file", + "protocol": "file:", + "username": "", + "password": "", + "host": "server", + "hostname": "server", + "port": "", + "pathname": "/file", + "search": "", + "hash": "" + }, + { + "input": "\\\\server\\file", + "base": "file:///tmp/mock/path", + "href": "file://server/file", + "protocol": "file:", + "username": "", + "password": "", + "host": "server", + "hostname": "server", + "port": "", + "pathname": "/file", + "search": "", + "hash": "" + }, + { + "input": "/\\server/file", + "base": "file:///tmp/mock/path", + "href": "file://server/file", + "protocol": "file:", + "username": "", + "password": "", + "host": "server", + "hostname": "server", + "port": "", + "pathname": "/file", + "search": "", + "hash": "" + }, + { + "input": "file:///foo/bar.txt", + "base": "file:///tmp/mock/path", + "href": "file:///foo/bar.txt", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/foo/bar.txt", + "search": "", + "hash": "" + }, + { + "input": "file:///home/me", + "base": "file:///tmp/mock/path", + "href": "file:///home/me", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/home/me", + "search": "", + "hash": "" + }, + { + "input": "//", + "base": "file:///tmp/mock/path", + "href": "file:///", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "///", + "base": "file:///tmp/mock/path", + "href": "file:///", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "///test", + "base": "file:///tmp/mock/path", + "href": "file:///test", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/test", + "search": "", + "hash": "" + }, + { + "input": "file://test", + "base": "file:///tmp/mock/path", + "href": "file://test/", + "protocol": "file:", + "username": "", + "password": "", + "host": "test", + "hostname": "test", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "file://localhost", + "base": "file:///tmp/mock/path", + "href": "file:///", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "file://localhost/", + "base": "file:///tmp/mock/path", + "href": "file:///", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "file://localhost/test", + "base": "file:///tmp/mock/path", + "href": "file:///test", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/test", + "search": "", + "hash": "" + }, + { + "input": "test", + "base": "file:///tmp/mock/path", + "href": "file:///tmp/mock/test", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/tmp/mock/test", + "search": "", + "hash": "" + }, + { + "input": "file:test", + "base": "file:///tmp/mock/path", + "href": "file:///tmp/mock/test", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/tmp/mock/test", + "search": "", + "hash": "" + }, + { + "input": "file:///w|m", + "base": null, + "href": "file:///w|m", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/w|m", + "search": "", + "hash": "" + }, + { + "input": "file:///w||m", + "base": null, + "href": "file:///w||m", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/w||m", + "search": "", + "hash": "" + }, + { + "input": "file:///w|/m", + "base": null, + "href": "file:///w:/m", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/w:/m", + "search": "", + "hash": "" + }, + { + "input": "file:C|/m/", + "base": null, + "href": "file:///C:/m/", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/C:/m/", + "search": "", + "hash": "" + }, + { + "input": "file:C||/m/", + "base": null, + "href": "file:///C||/m/", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/C||/m/", + "search": "", + "hash": "" + }, + "# Based on http://trac.webkit.org/browser/trunk/LayoutTests/fast/url/script-tests/path.js", + { + "input": "http://example.com/././foo", + "base": null, + "href": "http://example.com/foo", + "origin": "http://example.com", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.com", + "hostname": "example.com", + "port": "", + "pathname": "/foo", + "search": "", + "hash": "" + }, + { + "input": "http://example.com/./.foo", + "base": null, + "href": "http://example.com/.foo", + "origin": "http://example.com", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.com", + "hostname": "example.com", + "port": "", + "pathname": "/.foo", + "search": "", + "hash": "" + }, + { + "input": "http://example.com/foo/.", + "base": null, + "href": "http://example.com/foo/", + "origin": "http://example.com", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.com", + "hostname": "example.com", + "port": "", + "pathname": "/foo/", + "search": "", + "hash": "" + }, + { + "input": "http://example.com/foo/./", + "base": null, + "href": "http://example.com/foo/", + "origin": "http://example.com", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.com", + "hostname": "example.com", + "port": "", + "pathname": "/foo/", + "search": "", + "hash": "" + }, + { + "input": "http://example.com/foo/bar/..", + "base": null, + "href": "http://example.com/foo/", + "origin": "http://example.com", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.com", + "hostname": "example.com", + "port": "", + "pathname": "/foo/", + "search": "", + "hash": "" + }, + { + "input": "http://example.com/foo/bar/../", + "base": null, + "href": "http://example.com/foo/", + "origin": "http://example.com", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.com", + "hostname": "example.com", + "port": "", + "pathname": "/foo/", + "search": "", + "hash": "" + }, + { + "input": "http://example.com/foo/..bar", + "base": null, + "href": "http://example.com/foo/..bar", + "origin": "http://example.com", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.com", + "hostname": "example.com", + "port": "", + "pathname": "/foo/..bar", + "search": "", + "hash": "" + }, + { + "input": "http://example.com/foo/bar/../ton", + "base": null, + "href": "http://example.com/foo/ton", + "origin": "http://example.com", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.com", + "hostname": "example.com", + "port": "", + "pathname": "/foo/ton", + "search": "", + "hash": "" + }, + { + "input": "http://example.com/foo/bar/../ton/../../a", + "base": null, + "href": "http://example.com/a", + "origin": "http://example.com", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.com", + "hostname": "example.com", + "port": "", + "pathname": "/a", + "search": "", + "hash": "" + }, + { + "input": "http://example.com/foo/../../..", + "base": null, + "href": "http://example.com/", + "origin": "http://example.com", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.com", + "hostname": "example.com", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "http://example.com/foo/../../../ton", + "base": null, + "href": "http://example.com/ton", + "origin": "http://example.com", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.com", + "hostname": "example.com", + "port": "", + "pathname": "/ton", + "search": "", + "hash": "" + }, + { + "input": "http://example.com/foo/%2e", + "base": null, + "href": "http://example.com/foo/", + "origin": "http://example.com", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.com", + "hostname": "example.com", + "port": "", + "pathname": "/foo/", + "search": "", + "hash": "" + }, + { + "input": "http://example.com/foo/%2e%2", + "base": null, + "href": "http://example.com/foo/%2e%2", + "origin": "http://example.com", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.com", + "hostname": "example.com", + "port": "", + "pathname": "/foo/%2e%2", + "search": "", + "hash": "" + }, + { + "input": "http://example.com/foo/%2e./%2e%2e/.%2e/%2e.bar", + "base": null, + "href": "http://example.com/%2e.bar", + "origin": "http://example.com", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.com", + "hostname": "example.com", + "port": "", + "pathname": "/%2e.bar", + "search": "", + "hash": "" + }, + { + "input": "http://example.com////../..", + "base": null, + "href": "http://example.com//", + "origin": "http://example.com", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.com", + "hostname": "example.com", + "port": "", + "pathname": "//", + "search": "", + "hash": "" + }, + { + "input": "http://example.com/foo/bar//../..", + "base": null, + "href": "http://example.com/foo/", + "origin": "http://example.com", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.com", + "hostname": "example.com", + "port": "", + "pathname": "/foo/", + "search": "", + "hash": "" + }, + { + "input": "http://example.com/foo/bar//..", + "base": null, + "href": "http://example.com/foo/bar/", + "origin": "http://example.com", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.com", + "hostname": "example.com", + "port": "", + "pathname": "/foo/bar/", + "search": "", + "hash": "" + }, + { + "input": "http://example.com/foo", + "base": null, + "href": "http://example.com/foo", + "origin": "http://example.com", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.com", + "hostname": "example.com", + "port": "", + "pathname": "/foo", + "search": "", + "hash": "" + }, + { + "input": "http://example.com/%20foo", + "base": null, + "href": "http://example.com/%20foo", + "origin": "http://example.com", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.com", + "hostname": "example.com", + "port": "", + "pathname": "/%20foo", + "search": "", + "hash": "" + }, + { + "input": "http://example.com/foo%", + "base": null, + "href": "http://example.com/foo%", + "origin": "http://example.com", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.com", + "hostname": "example.com", + "port": "", + "pathname": "/foo%", + "search": "", + "hash": "" + }, + { + "input": "http://example.com/foo%2", + "base": null, + "href": "http://example.com/foo%2", + "origin": "http://example.com", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.com", + "hostname": "example.com", + "port": "", + "pathname": "/foo%2", + "search": "", + "hash": "" + }, + { + "input": "http://example.com/foo%2zbar", + "base": null, + "href": "http://example.com/foo%2zbar", + "origin": "http://example.com", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.com", + "hostname": "example.com", + "port": "", + "pathname": "/foo%2zbar", + "search": "", + "hash": "" + }, + { + "input": "http://example.com/foo%2©zbar", + "base": null, + "href": "http://example.com/foo%2%C3%82%C2%A9zbar", + "origin": "http://example.com", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.com", + "hostname": "example.com", + "port": "", + "pathname": "/foo%2%C3%82%C2%A9zbar", + "search": "", + "hash": "" + }, + { + "input": "http://example.com/foo%41%7a", + "base": null, + "href": "http://example.com/foo%41%7a", + "origin": "http://example.com", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.com", + "hostname": "example.com", + "port": "", + "pathname": "/foo%41%7a", + "search": "", + "hash": "" + }, + { + "input": "http://example.com/foo\t‘%91", + "base": null, + "href": "http://example.com/foo%C2%91%91", + "origin": "http://example.com", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.com", + "hostname": "example.com", + "port": "", + "pathname": "/foo%C2%91%91", + "search": "", + "hash": "" + }, + { + "input": "http://example.com/foo%00%51", + "base": null, + "href": "http://example.com/foo%00%51", + "origin": "http://example.com", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.com", + "hostname": "example.com", + "port": "", + "pathname": "/foo%00%51", + "search": "", + "hash": "" + }, + { + "input": "http://example.com/(%28:%3A%29)", + "base": null, + "href": "http://example.com/(%28:%3A%29)", + "origin": "http://example.com", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.com", + "hostname": "example.com", + "port": "", + "pathname": "/(%28:%3A%29)", + "search": "", + "hash": "" + }, + { + "input": "http://example.com/%3A%3a%3C%3c", + "base": null, + "href": "http://example.com/%3A%3a%3C%3c", + "origin": "http://example.com", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.com", + "hostname": "example.com", + "port": "", + "pathname": "/%3A%3a%3C%3c", + "search": "", + "hash": "" + }, + { + "input": "http://example.com/foo\tbar", + "base": null, + "href": "http://example.com/foobar", + "origin": "http://example.com", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.com", + "hostname": "example.com", + "port": "", + "pathname": "/foobar", + "search": "", + "hash": "" + }, + { + "input": "http://example.com\\\\foo\\\\bar", + "base": null, + "href": "http://example.com//foo//bar", + "origin": "http://example.com", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.com", + "hostname": "example.com", + "port": "", + "pathname": "//foo//bar", + "search": "", + "hash": "" + }, + { + "input": "http://example.com/%7Ffp3%3Eju%3Dduvgw%3Dd", + "base": null, + "href": "http://example.com/%7Ffp3%3Eju%3Dduvgw%3Dd", + "origin": "http://example.com", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.com", + "hostname": "example.com", + "port": "", + "pathname": "/%7Ffp3%3Eju%3Dduvgw%3Dd", + "search": "", + "hash": "" + }, + { + "input": "http://example.com/@asdf%40", + "base": null, + "href": "http://example.com/@asdf%40", + "origin": "http://example.com", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.com", + "hostname": "example.com", + "port": "", + "pathname": "/@asdf%40", + "search": "", + "hash": "" + }, + { + "input": "http://example.com/你好你好", + "base": null, + "href": "http://example.com/%E4%BD%A0%E5%A5%BD%E4%BD%A0%E5%A5%BD", + "origin": "http://example.com", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.com", + "hostname": "example.com", + "port": "", + "pathname": "/%E4%BD%A0%E5%A5%BD%E4%BD%A0%E5%A5%BD", + "search": "", + "hash": "" + }, + { + "input": "http://example.com/‥/foo", + "base": null, + "href": "http://example.com/%E2%80%A5/foo", + "origin": "http://example.com", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.com", + "hostname": "example.com", + "port": "", + "pathname": "/%E2%80%A5/foo", + "search": "", + "hash": "" + }, + { + "input": "http://example.com//foo", + "base": null, + "href": "http://example.com/%EF%BB%BF/foo", + "origin": "http://example.com", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.com", + "hostname": "example.com", + "port": "", + "pathname": "/%EF%BB%BF/foo", + "search": "", + "hash": "" + }, + { + "input": "http://example.com/‮/foo/‭/bar", + "base": null, + "href": "http://example.com/%E2%80%AE/foo/%E2%80%AD/bar", + "origin": "http://example.com", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.com", + "hostname": "example.com", + "port": "", + "pathname": "/%E2%80%AE/foo/%E2%80%AD/bar", + "search": "", + "hash": "" + }, + "# Based on http://trac.webkit.org/browser/trunk/LayoutTests/fast/url/script-tests/relative.js", + { + "input": "http://www.google.com/foo?bar=baz#", + "base": null, + "href": "http://www.google.com/foo?bar=baz#", + "origin": "http://www.google.com", + "protocol": "http:", + "username": "", + "password": "", + "host": "www.google.com", + "hostname": "www.google.com", + "port": "", + "pathname": "/foo", + "search": "?bar=baz", + "hash": "" + }, + { + "input": "http://www.google.com/foo?bar=baz# »", + "base": null, + "href": "http://www.google.com/foo?bar=baz#%20%C2%BB", + "origin": "http://www.google.com", + "protocol": "http:", + "username": "", + "password": "", + "host": "www.google.com", + "hostname": "www.google.com", + "port": "", + "pathname": "/foo", + "search": "?bar=baz", + "hash": "#%20%C2%BB" + }, + { + "input": "data:test# »", + "base": null, + "href": "data:test#%20%C2%BB", + "origin": "null", + "protocol": "data:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "test", + "search": "", + "hash": "#%20%C2%BB" + }, + { + "input": "http://www.google.com", + "base": null, + "href": "http://www.google.com/", + "origin": "http://www.google.com", + "protocol": "http:", + "username": "", + "password": "", + "host": "www.google.com", + "hostname": "www.google.com", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "http://192.0x00A80001", + "base": null, + "href": "http://192.168.0.1/", + "origin": "http://192.168.0.1", + "protocol": "http:", + "username": "", + "password": "", + "host": "192.168.0.1", + "hostname": "192.168.0.1", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "http://www/foo%2Ehtml", + "base": null, + "href": "http://www/foo%2Ehtml", + "origin": "http://www", + "protocol": "http:", + "username": "", + "password": "", + "host": "www", + "hostname": "www", + "port": "", + "pathname": "/foo%2Ehtml", + "search": "", + "hash": "" + }, + { + "input": "http://www/foo/%2E/html", + "base": null, + "href": "http://www/foo/html", + "origin": "http://www", + "protocol": "http:", + "username": "", + "password": "", + "host": "www", + "hostname": "www", + "port": "", + "pathname": "/foo/html", + "search": "", + "hash": "" + }, + { + "input": "http://user:pass@/", + "base": null, + "failure": true + }, + { + "input": "http://%25DOMAIN:foobar@foodomain.com/", + "base": null, + "href": "http://%25DOMAIN:foobar@foodomain.com/", + "origin": "http://foodomain.com", + "protocol": "http:", + "username": "%25DOMAIN", + "password": "foobar", + "host": "foodomain.com", + "hostname": "foodomain.com", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "http:\\\\www.google.com\\foo", + "base": null, + "href": "http://www.google.com/foo", + "origin": "http://www.google.com", + "protocol": "http:", + "username": "", + "password": "", + "host": "www.google.com", + "hostname": "www.google.com", + "port": "", + "pathname": "/foo", + "search": "", + "hash": "" + }, + { + "input": "http://foo:80/", + "base": null, + "href": "http://foo/", + "origin": "http://foo", + "protocol": "http:", + "username": "", + "password": "", + "host": "foo", + "hostname": "foo", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "http://foo:81/", + "base": null, + "href": "http://foo:81/", + "origin": "http://foo:81", + "protocol": "http:", + "username": "", + "password": "", + "host": "foo:81", + "hostname": "foo", + "port": "81", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "httpa://foo:80/", + "base": null, + "href": "httpa://foo:80/", + "origin": "null", + "protocol": "httpa:", + "username": "", + "password": "", + "host": "foo:80", + "hostname": "foo", + "port": "80", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "http://foo:-80/", + "base": null, + "failure": true + }, + { + "input": "https://foo:443/", + "base": null, + "href": "https://foo/", + "origin": "https://foo", + "protocol": "https:", + "username": "", + "password": "", + "host": "foo", + "hostname": "foo", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "https://foo:80/", + "base": null, + "href": "https://foo:80/", + "origin": "https://foo:80", + "protocol": "https:", + "username": "", + "password": "", + "host": "foo:80", + "hostname": "foo", + "port": "80", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "ftp://foo:21/", + "base": null, + "href": "ftp://foo/", + "origin": "ftp://foo", + "protocol": "ftp:", + "username": "", + "password": "", + "host": "foo", + "hostname": "foo", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "ftp://foo:80/", + "base": null, + "href": "ftp://foo:80/", + "origin": "ftp://foo:80", + "protocol": "ftp:", + "username": "", + "password": "", + "host": "foo:80", + "hostname": "foo", + "port": "80", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "gopher://foo:70/", + "base": null, + "href": "gopher://foo:70/", + "origin": "null", + "protocol": "gopher:", + "username": "", + "password": "", + "host": "foo:70", + "hostname": "foo", + "port": "70", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "gopher://foo:443/", + "base": null, + "href": "gopher://foo:443/", + "origin": "null", + "protocol": "gopher:", + "username": "", + "password": "", + "host": "foo:443", + "hostname": "foo", + "port": "443", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "ws://foo:80/", + "base": null, + "href": "ws://foo/", + "origin": "ws://foo", + "protocol": "ws:", + "username": "", + "password": "", + "host": "foo", + "hostname": "foo", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "ws://foo:81/", + "base": null, + "href": "ws://foo:81/", + "origin": "ws://foo:81", + "protocol": "ws:", + "username": "", + "password": "", + "host": "foo:81", + "hostname": "foo", + "port": "81", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "ws://foo:443/", + "base": null, + "href": "ws://foo:443/", + "origin": "ws://foo:443", + "protocol": "ws:", + "username": "", + "password": "", + "host": "foo:443", + "hostname": "foo", + "port": "443", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "ws://foo:815/", + "base": null, + "href": "ws://foo:815/", + "origin": "ws://foo:815", + "protocol": "ws:", + "username": "", + "password": "", + "host": "foo:815", + "hostname": "foo", + "port": "815", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "wss://foo:80/", + "base": null, + "href": "wss://foo:80/", + "origin": "wss://foo:80", + "protocol": "wss:", + "username": "", + "password": "", + "host": "foo:80", + "hostname": "foo", + "port": "80", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "wss://foo:81/", + "base": null, + "href": "wss://foo:81/", + "origin": "wss://foo:81", + "protocol": "wss:", + "username": "", + "password": "", + "host": "foo:81", + "hostname": "foo", + "port": "81", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "wss://foo:443/", + "base": null, + "href": "wss://foo/", + "origin": "wss://foo", + "protocol": "wss:", + "username": "", + "password": "", + "host": "foo", + "hostname": "foo", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "wss://foo:815/", + "base": null, + "href": "wss://foo:815/", + "origin": "wss://foo:815", + "protocol": "wss:", + "username": "", + "password": "", + "host": "foo:815", + "hostname": "foo", + "port": "815", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "http:/example.com/", + "base": null, + "href": "http://example.com/", + "origin": "http://example.com", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.com", + "hostname": "example.com", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "ftp:/example.com/", + "base": null, + "href": "ftp://example.com/", + "origin": "ftp://example.com", + "protocol": "ftp:", + "username": "", + "password": "", + "host": "example.com", + "hostname": "example.com", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "https:/example.com/", + "base": null, + "href": "https://example.com/", + "origin": "https://example.com", + "protocol": "https:", + "username": "", + "password": "", + "host": "example.com", + "hostname": "example.com", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "madeupscheme:/example.com/", + "base": null, + "href": "madeupscheme:/example.com/", + "origin": "null", + "protocol": "madeupscheme:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/example.com/", + "search": "", + "hash": "" + }, + { + "input": "file:/example.com/", + "base": null, + "href": "file:///example.com/", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/example.com/", + "search": "", + "hash": "" + }, + { + "input": "ftps:/example.com/", + "base": null, + "href": "ftps:/example.com/", + "origin": "null", + "protocol": "ftps:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/example.com/", + "search": "", + "hash": "" + }, + { + "input": "gopher:/example.com/", + "base": null, + "href": "gopher:/example.com/", + "origin": "null", + "protocol": "gopher:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/example.com/", + "search": "", + "hash": "" + }, + { + "input": "ws:/example.com/", + "base": null, + "href": "ws://example.com/", + "origin": "ws://example.com", + "protocol": "ws:", + "username": "", + "password": "", + "host": "example.com", + "hostname": "example.com", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "wss:/example.com/", + "base": null, + "href": "wss://example.com/", + "origin": "wss://example.com", + "protocol": "wss:", + "username": "", + "password": "", + "host": "example.com", + "hostname": "example.com", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "data:/example.com/", + "base": null, + "href": "data:/example.com/", + "origin": "null", + "protocol": "data:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/example.com/", + "search": "", + "hash": "" + }, + { + "input": "javascript:/example.com/", + "base": null, + "href": "javascript:/example.com/", + "origin": "null", + "protocol": "javascript:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/example.com/", + "search": "", + "hash": "" + }, + { + "input": "mailto:/example.com/", + "base": null, + "href": "mailto:/example.com/", + "origin": "null", + "protocol": "mailto:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/example.com/", + "search": "", + "hash": "" + }, + { + "input": "http:example.com/", + "base": null, + "href": "http://example.com/", + "origin": "http://example.com", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.com", + "hostname": "example.com", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "ftp:example.com/", + "base": null, + "href": "ftp://example.com/", + "origin": "ftp://example.com", + "protocol": "ftp:", + "username": "", + "password": "", + "host": "example.com", + "hostname": "example.com", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "https:example.com/", + "base": null, + "href": "https://example.com/", + "origin": "https://example.com", + "protocol": "https:", + "username": "", + "password": "", + "host": "example.com", + "hostname": "example.com", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "madeupscheme:example.com/", + "base": null, + "href": "madeupscheme:example.com/", + "origin": "null", + "protocol": "madeupscheme:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "example.com/", + "search": "", + "hash": "" + }, + { + "input": "ftps:example.com/", + "base": null, + "href": "ftps:example.com/", + "origin": "null", + "protocol": "ftps:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "example.com/", + "search": "", + "hash": "" + }, + { + "input": "gopher:example.com/", + "base": null, + "href": "gopher:example.com/", + "origin": "null", + "protocol": "gopher:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "example.com/", + "search": "", + "hash": "" + }, + { + "input": "ws:example.com/", + "base": null, + "href": "ws://example.com/", + "origin": "ws://example.com", + "protocol": "ws:", + "username": "", + "password": "", + "host": "example.com", + "hostname": "example.com", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "wss:example.com/", + "base": null, + "href": "wss://example.com/", + "origin": "wss://example.com", + "protocol": "wss:", + "username": "", + "password": "", + "host": "example.com", + "hostname": "example.com", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "data:example.com/", + "base": null, + "href": "data:example.com/", + "origin": "null", + "protocol": "data:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "example.com/", + "search": "", + "hash": "" + }, + { + "input": "javascript:example.com/", + "base": null, + "href": "javascript:example.com/", + "origin": "null", + "protocol": "javascript:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "example.com/", + "search": "", + "hash": "" + }, + { + "input": "mailto:example.com/", + "base": null, + "href": "mailto:example.com/", + "origin": "null", + "protocol": "mailto:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "example.com/", + "search": "", + "hash": "" + }, + "# Based on http://trac.webkit.org/browser/trunk/LayoutTests/fast/url/segments-userinfo-vs-host.html", + { + "input": "http:@www.example.com", + "base": null, + "href": "http://www.example.com/", + "origin": "http://www.example.com", + "protocol": "http:", + "username": "", + "password": "", + "host": "www.example.com", + "hostname": "www.example.com", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "http:/@www.example.com", + "base": null, + "href": "http://www.example.com/", + "origin": "http://www.example.com", + "protocol": "http:", + "username": "", + "password": "", + "host": "www.example.com", + "hostname": "www.example.com", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "http://@www.example.com", + "base": null, + "href": "http://www.example.com/", + "origin": "http://www.example.com", + "protocol": "http:", + "username": "", + "password": "", + "host": "www.example.com", + "hostname": "www.example.com", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "http:a:b@www.example.com", + "base": null, + "href": "http://a:b@www.example.com/", + "origin": "http://www.example.com", + "protocol": "http:", + "username": "a", + "password": "b", + "host": "www.example.com", + "hostname": "www.example.com", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "http:/a:b@www.example.com", + "base": null, + "href": "http://a:b@www.example.com/", + "origin": "http://www.example.com", + "protocol": "http:", + "username": "a", + "password": "b", + "host": "www.example.com", + "hostname": "www.example.com", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "http://a:b@www.example.com", + "base": null, + "href": "http://a:b@www.example.com/", + "origin": "http://www.example.com", + "protocol": "http:", + "username": "a", + "password": "b", + "host": "www.example.com", + "hostname": "www.example.com", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "http://@pple.com", + "base": null, + "href": "http://pple.com/", + "origin": "http://pple.com", + "protocol": "http:", + "username": "", + "password": "", + "host": "pple.com", + "hostname": "pple.com", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "http::b@www.example.com", + "base": null, + "href": "http://:b@www.example.com/", + "origin": "http://www.example.com", + "protocol": "http:", + "username": "", + "password": "b", + "host": "www.example.com", + "hostname": "www.example.com", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "http:/:b@www.example.com", + "base": null, + "href": "http://:b@www.example.com/", + "origin": "http://www.example.com", + "protocol": "http:", + "username": "", + "password": "b", + "host": "www.example.com", + "hostname": "www.example.com", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "http://:b@www.example.com", + "base": null, + "href": "http://:b@www.example.com/", + "origin": "http://www.example.com", + "protocol": "http:", + "username": "", + "password": "b", + "host": "www.example.com", + "hostname": "www.example.com", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "http:/:@/www.example.com", + "base": null, + "failure": true, + "relativeTo": "non-opaque-path-base" + }, + { + "input": "http://user@/www.example.com", + "base": null, + "failure": true + }, + { + "input": "http:@/www.example.com", + "base": null, + "failure": true, + "relativeTo": "non-opaque-path-base" + }, + { + "input": "http:/@/www.example.com", + "base": null, + "failure": true, + "relativeTo": "non-opaque-path-base" + }, + { + "input": "http://@/www.example.com", + "base": null, + "failure": true + }, + { + "input": "https:@/www.example.com", + "base": null, + "failure": true, + "relativeTo": "non-opaque-path-base" + }, + { + "input": "http:a:b@/www.example.com", + "base": null, + "failure": true, + "relativeTo": "non-opaque-path-base" + }, + { + "input": "http:/a:b@/www.example.com", + "base": null, + "failure": true, + "relativeTo": "non-opaque-path-base" + }, + { + "input": "http://a:b@/www.example.com", + "base": null, + "failure": true + }, + { + "input": "http::@/www.example.com", + "base": null, + "failure": true, + "relativeTo": "non-opaque-path-base" + }, + { + "input": "http:a:@www.example.com", + "base": null, + "href": "http://a@www.example.com/", + "origin": "http://www.example.com", + "protocol": "http:", + "username": "a", + "password": "", + "host": "www.example.com", + "hostname": "www.example.com", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "http:/a:@www.example.com", + "base": null, + "href": "http://a@www.example.com/", + "origin": "http://www.example.com", + "protocol": "http:", + "username": "a", + "password": "", + "host": "www.example.com", + "hostname": "www.example.com", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "http://a:@www.example.com", + "base": null, + "href": "http://a@www.example.com/", + "origin": "http://www.example.com", + "protocol": "http:", + "username": "a", + "password": "", + "host": "www.example.com", + "hostname": "www.example.com", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "http://www.@pple.com", + "base": null, + "href": "http://www.@pple.com/", + "origin": "http://pple.com", + "protocol": "http:", + "username": "www.", + "password": "", + "host": "pple.com", + "hostname": "pple.com", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "http:@:www.example.com", + "base": null, + "failure": true, + "relativeTo": "non-opaque-path-base" + }, + { + "input": "http:/@:www.example.com", + "base": null, + "failure": true, + "relativeTo": "non-opaque-path-base" + }, + { + "input": "http://@:www.example.com", + "base": null, + "failure": true + }, + { + "input": "http://:@www.example.com", + "base": null, + "href": "http://www.example.com/", + "origin": "http://www.example.com", + "protocol": "http:", + "username": "", + "password": "", + "host": "www.example.com", + "hostname": "www.example.com", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + "# Others", + { + "input": "/", + "base": "http://www.example.com/test", + "href": "http://www.example.com/", + "origin": "http://www.example.com", + "protocol": "http:", + "username": "", + "password": "", + "host": "www.example.com", + "hostname": "www.example.com", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "/test.txt", + "base": "http://www.example.com/test", + "href": "http://www.example.com/test.txt", + "origin": "http://www.example.com", + "protocol": "http:", + "username": "", + "password": "", + "host": "www.example.com", + "hostname": "www.example.com", + "port": "", + "pathname": "/test.txt", + "search": "", + "hash": "" + }, + { + "input": ".", + "base": "http://www.example.com/test", + "href": "http://www.example.com/", + "origin": "http://www.example.com", + "protocol": "http:", + "username": "", + "password": "", + "host": "www.example.com", + "hostname": "www.example.com", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "..", + "base": "http://www.example.com/test", + "href": "http://www.example.com/", + "origin": "http://www.example.com", + "protocol": "http:", + "username": "", + "password": "", + "host": "www.example.com", + "hostname": "www.example.com", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "test.txt", + "base": "http://www.example.com/test", + "href": "http://www.example.com/test.txt", + "origin": "http://www.example.com", + "protocol": "http:", + "username": "", + "password": "", + "host": "www.example.com", + "hostname": "www.example.com", + "port": "", + "pathname": "/test.txt", + "search": "", + "hash": "" + }, + { + "input": "./test.txt", + "base": "http://www.example.com/test", + "href": "http://www.example.com/test.txt", + "origin": "http://www.example.com", + "protocol": "http:", + "username": "", + "password": "", + "host": "www.example.com", + "hostname": "www.example.com", + "port": "", + "pathname": "/test.txt", + "search": "", + "hash": "" + }, + { + "input": "../test.txt", + "base": "http://www.example.com/test", + "href": "http://www.example.com/test.txt", + "origin": "http://www.example.com", + "protocol": "http:", + "username": "", + "password": "", + "host": "www.example.com", + "hostname": "www.example.com", + "port": "", + "pathname": "/test.txt", + "search": "", + "hash": "" + }, + { + "input": "../aaa/test.txt", + "base": "http://www.example.com/test", + "href": "http://www.example.com/aaa/test.txt", + "origin": "http://www.example.com", + "protocol": "http:", + "username": "", + "password": "", + "host": "www.example.com", + "hostname": "www.example.com", + "port": "", + "pathname": "/aaa/test.txt", + "search": "", + "hash": "" + }, + { + "input": "../../test.txt", + "base": "http://www.example.com/test", + "href": "http://www.example.com/test.txt", + "origin": "http://www.example.com", + "protocol": "http:", + "username": "", + "password": "", + "host": "www.example.com", + "hostname": "www.example.com", + "port": "", + "pathname": "/test.txt", + "search": "", + "hash": "" + }, + { + "input": "中/test.txt", + "base": "http://www.example.com/test", + "href": "http://www.example.com/%E4%B8%AD/test.txt", + "origin": "http://www.example.com", + "protocol": "http:", + "username": "", + "password": "", + "host": "www.example.com", + "hostname": "www.example.com", + "port": "", + "pathname": "/%E4%B8%AD/test.txt", + "search": "", + "hash": "" + }, + { + "input": "http://www.example2.com", + "base": "http://www.example.com/test", + "href": "http://www.example2.com/", + "origin": "http://www.example2.com", + "protocol": "http:", + "username": "", + "password": "", + "host": "www.example2.com", + "hostname": "www.example2.com", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "//www.example2.com", + "base": "http://www.example.com/test", + "href": "http://www.example2.com/", + "origin": "http://www.example2.com", + "protocol": "http:", + "username": "", + "password": "", + "host": "www.example2.com", + "hostname": "www.example2.com", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "file:...", + "base": "http://www.example.com/test", + "href": "file:///...", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/...", + "search": "", + "hash": "" + }, + { + "input": "file:..", + "base": "http://www.example.com/test", + "href": "file:///", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "file:a", + "base": "http://www.example.com/test", + "href": "file:///a", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/a", + "search": "", + "hash": "" + }, + { + "input": "file:.", + "base": null, + "href": "file:///", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "file:.", + "base": "http://www.example.com/test", + "href": "file:///", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + "# Based on http://trac.webkit.org/browser/trunk/LayoutTests/fast/url/host.html", + "Basic canonicalization, uppercase should be converted to lowercase", + { + "input": "http://ExAmPlE.CoM", + "base": "http://other.com/", + "href": "http://example.com/", + "origin": "http://example.com", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.com", + "hostname": "example.com", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "http://example example.com", + "base": "http://other.com/", + "failure": true + }, + { + "input": "http://Goo%20 goo%7C|.com", + "base": "http://other.com/", + "failure": true + }, + { + "input": "http://[]", + "base": "http://other.com/", + "failure": true + }, + { + "input": "http://[:]", + "base": "http://other.com/", + "failure": true + }, + "U+3000 is mapped to U+0020 (space) which is disallowed", + { + "input": "http://GOO  goo.com", + "base": "http://other.com/", + "failure": true + }, + "Other types of space (no-break, zero-width, zero-width-no-break) are name-prepped away to nothing. U+200B, U+2060, and U+FEFF, are ignored", + { + "input": "http://GOO​⁠goo.com", + "base": "http://other.com/", + "href": "http://googoo.com/", + "origin": "http://googoo.com", + "protocol": "http:", + "username": "", + "password": "", + "host": "googoo.com", + "hostname": "googoo.com", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + "Leading and trailing C0 control or space", + { + "input": "\u0000\u001b\u0004\u0012 http://example.com/\u001f \r ", + "base": null, + "href": "http://example.com/", + "origin": "http://example.com", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.com", + "hostname": "example.com", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + "Ideographic full stop (full-width period for Chinese, etc.) should be treated as a dot. U+3002 is mapped to U+002E (dot)", + { + "input": "http://www.foo。bar.com", + "base": "http://other.com/", + "href": "http://www.foo.bar.com/", + "origin": "http://www.foo.bar.com", + "protocol": "http:", + "username": "", + "password": "", + "host": "www.foo.bar.com", + "hostname": "www.foo.bar.com", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + "Invalid unicode characters should fail... U+FDD0 is disallowed; %ef%b7%90 is U+FDD0", + { + "input": "http://﷐zyx.com", + "base": "http://other.com/", + "failure": true + }, + "This is the same as previous but escaped", + { + "input": "http://%ef%b7%90zyx.com", + "base": "http://other.com/", + "failure": true + }, + "U+FFFD", + { + "input": "https://�", + "base": null, + "failure": true + }, + { + "input": "https://%EF%BF%BD", + "base": null, + "failure": true + }, + { + "input": "https://x/�?�#�", + "base": null, + "href": "https://x/%EF%BF%BD?%EF%BF%BD#%EF%BF%BD", + "origin": "https://x", + "protocol": "https:", + "username": "", + "password": "", + "host": "x", + "hostname": "x", + "port": "", + "pathname": "/%EF%BF%BD", + "search": "?%EF%BF%BD", + "hash": "#%EF%BF%BD" + }, + "Domain is ASCII, but a label is invalid IDNA", + { + "input": "http://a.b.c.xn--pokxncvks", + "base": null, + "failure": true + }, + { + "input": "http://10.0.0.xn--pokxncvks", + "base": null, + "failure": true + }, + "IDNA labels should be matched case-insensitively", + { + "input": "http://a.b.c.XN--pokxncvks", + "base": null, + "failure": true + }, + { + "input": "http://a.b.c.Xn--pokxncvks", + "base": null, + "failure": true + }, + { + "input": "http://10.0.0.XN--pokxncvks", + "base": null, + "failure": true + }, + { + "input": "http://10.0.0.xN--pokxncvks", + "base": null, + "failure": true + }, + "Test name prepping, fullwidth input should be converted to ASCII and NOT IDN-ized. This is 'Go' in fullwidth UTF-8/UTF-16.", + { + "input": "http://Go.com", + "base": "http://other.com/", + "href": "http://go.com/", + "origin": "http://go.com", + "protocol": "http:", + "username": "", + "password": "", + "host": "go.com", + "hostname": "go.com", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + "URL spec forbids the following. https://www.w3.org/Bugs/Public/show_bug.cgi?id=24257", + { + "input": "http://%41.com", + "base": "http://other.com/", + "failure": true + }, + { + "input": "http://%ef%bc%85%ef%bc%94%ef%bc%91.com", + "base": "http://other.com/", + "failure": true + }, + "...%00 in fullwidth should fail (also as escaped UTF-8 input)", + { + "input": "http://%00.com", + "base": "http://other.com/", + "failure": true + }, + { + "input": "http://%ef%bc%85%ef%bc%90%ef%bc%90.com", + "base": "http://other.com/", + "failure": true + }, + "Basic IDN support, UTF-8 and UTF-16 input should be converted to IDN", + { + "input": "http://你好你好", + "base": "http://other.com/", + "href": "http://xn--6qqa088eba/", + "origin": "http://xn--6qqa088eba", + "protocol": "http:", + "username": "", + "password": "", + "host": "xn--6qqa088eba", + "hostname": "xn--6qqa088eba", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "https://faß.ExAmPlE/", + "base": null, + "href": "https://xn--fa-hia.example/", + "origin": "https://xn--fa-hia.example", + "protocol": "https:", + "username": "", + "password": "", + "host": "xn--fa-hia.example", + "hostname": "xn--fa-hia.example", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "sc://faß.ExAmPlE/", + "base": null, + "href": "sc://fa%C3%9F.ExAmPlE/", + "origin": "null", + "protocol": "sc:", + "username": "", + "password": "", + "host": "fa%C3%9F.ExAmPlE", + "hostname": "fa%C3%9F.ExAmPlE", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + "Invalid escaped characters should fail and the percents should be escaped. https://www.w3.org/Bugs/Public/show_bug.cgi?id=24191", + { + "input": "http://%zz%66%a.com", + "base": "http://other.com/", + "failure": true + }, + "If we get an invalid character that has been escaped.", + { + "input": "http://%25", + "base": "http://other.com/", + "failure": true + }, + { + "input": "http://hello%00", + "base": "http://other.com/", + "failure": true + }, + "Escaped numbers should be treated like IP addresses if they are.", + { + "input": "http://%30%78%63%30%2e%30%32%35%30.01", + "base": "http://other.com/", + "href": "http://192.168.0.1/", + "origin": "http://192.168.0.1", + "protocol": "http:", + "username": "", + "password": "", + "host": "192.168.0.1", + "hostname": "192.168.0.1", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "http://%30%78%63%30%2e%30%32%35%30.01%2e", + "base": "http://other.com/", + "href": "http://192.168.0.1/", + "origin": "http://192.168.0.1", + "protocol": "http:", + "username": "", + "password": "", + "host": "192.168.0.1", + "hostname": "192.168.0.1", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "http://192.168.0.257", + "base": "http://other.com/", + "failure": true + }, + "Invalid escaping in hosts causes failure", + { + "input": "http://%3g%78%63%30%2e%30%32%35%30%2E.01", + "base": "http://other.com/", + "failure": true + }, + "A space in a host causes failure", + { + "input": "http://192.168.0.1 hello", + "base": "http://other.com/", + "failure": true + }, + { + "input": "https://x x:12", + "base": null, + "failure": true + }, + "Fullwidth and escaped UTF-8 fullwidth should still be treated as IP", + { + "input": "http://0Xc0.0250.01", + "base": "http://other.com/", + "href": "http://192.168.0.1/", + "origin": "http://192.168.0.1", + "protocol": "http:", + "username": "", + "password": "", + "host": "192.168.0.1", + "hostname": "192.168.0.1", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + "Domains with empty labels", + { + "input": "http://./", + "base": null, + "href": "http://./", + "origin": "http://.", + "protocol": "http:", + "username": "", + "password": "", + "host": ".", + "hostname": ".", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "http://../", + "base": null, + "href": "http://../", + "origin": "http://..", + "protocol": "http:", + "username": "", + "password": "", + "host": "..", + "hostname": "..", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + "Non-special domains with empty labels", + { + "input": "h://.", + "base": null, + "href": "h://.", + "origin": "null", + "protocol": "h:", + "username": "", + "password": "", + "host": ".", + "hostname": ".", + "port": "", + "pathname": "", + "search": "", + "hash": "" + }, + "Broken IPv6", + { + "input": "http://[www.google.com]/", + "base": null, + "failure": true + }, + { + "input": "http://[google.com]", + "base": "http://other.com/", + "failure": true + }, + { + "input": "http://[::1.2.3.4x]", + "base": "http://other.com/", + "failure": true + }, + { + "input": "http://[::1.2.3.]", + "base": "http://other.com/", + "failure": true + }, + { + "input": "http://[::1.2.]", + "base": "http://other.com/", + "failure": true + }, + { + "input": "http://[::.1.2]", + "base": "http://other.com/", + "failure": true + }, + { + "input": "http://[::1.]", + "base": "http://other.com/", + "failure": true + }, + { + "input": "http://[::.1]", + "base": "http://other.com/", + "failure": true + }, + { + "input": "http://[::%31]", + "base": "http://other.com/", + "failure": true + }, + { + "input": "http://%5B::1]", + "base": "http://other.com/", + "failure": true + }, + "Misc Unicode", + { + "input": "http://foo:💩@example.com/bar", + "base": "http://other.com/", + "href": "http://foo:%F0%9F%92%A9@example.com/bar", + "origin": "http://example.com", + "protocol": "http:", + "username": "foo", + "password": "%F0%9F%92%A9", + "host": "example.com", + "hostname": "example.com", + "port": "", + "pathname": "/bar", + "search": "", + "hash": "" + }, + "# resolving a fragment against any scheme succeeds", + { + "input": "#", + "base": "test:test", + "href": "test:test#", + "origin": "null", + "protocol": "test:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "test", + "search": "", + "hash": "" + }, + { + "input": "#x", + "base": "mailto:x@x.com", + "href": "mailto:x@x.com#x", + "origin": "null", + "protocol": "mailto:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "x@x.com", + "search": "", + "hash": "#x" + }, + { + "input": "#x", + "base": "data:,", + "href": "data:,#x", + "origin": "null", + "protocol": "data:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": ",", + "search": "", + "hash": "#x" + }, + { + "input": "#x", + "base": "about:blank", + "href": "about:blank#x", + "origin": "null", + "protocol": "about:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "blank", + "search": "", + "hash": "#x" + }, + { + "input": "#x:y", + "base": "about:blank", + "href": "about:blank#x:y", + "origin": "null", + "protocol": "about:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "blank", + "search": "", + "hash": "#x:y" + }, + { + "input": "#", + "base": "test:test?test", + "href": "test:test?test#", + "origin": "null", + "protocol": "test:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "test", + "search": "?test", + "hash": "" + }, + "# multiple @ in authority state", + { + "input": "https://@test@test@example:800/", + "base": "http://doesnotmatter/", + "href": "https://%40test%40test@example:800/", + "origin": "https://example:800", + "protocol": "https:", + "username": "%40test%40test", + "password": "", + "host": "example:800", + "hostname": "example", + "port": "800", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "https://@@@example", + "base": "http://doesnotmatter/", + "href": "https://%40%40@example/", + "origin": "https://example", + "protocol": "https:", + "username": "%40%40", + "password": "", + "host": "example", + "hostname": "example", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + "non-az-09 characters", + { + "input": "http://`{}:`{}@h/`{}?`{}", + "base": "http://doesnotmatter/", + "href": "http://%60%7B%7D:%60%7B%7D@h/%60%7B%7D?`{}", + "origin": "http://h", + "protocol": "http:", + "username": "%60%7B%7D", + "password": "%60%7B%7D", + "host": "h", + "hostname": "h", + "port": "", + "pathname": "/%60%7B%7D", + "search": "?`{}", + "hash": "" + }, + "byte is ' and url is special", + { + "input": "http://host/?'", + "base": null, + "href": "http://host/?%27", + "origin": "http://host", + "protocol": "http:", + "username": "", + "password": "", + "host": "host", + "hostname": "host", + "port": "", + "pathname": "/", + "search": "?%27", + "hash": "" + }, + { + "input": "notspecial://host/?'", + "base": null, + "href": "notspecial://host/?'", + "origin": "null", + "protocol": "notspecial:", + "username": "", + "password": "", + "host": "host", + "hostname": "host", + "port": "", + "pathname": "/", + "search": "?'", + "hash": "" + }, + "# Credentials in base", + { + "input": "/some/path", + "base": "http://user@example.org/smth", + "href": "http://user@example.org/some/path", + "origin": "http://example.org", + "protocol": "http:", + "username": "user", + "password": "", + "host": "example.org", + "hostname": "example.org", + "port": "", + "pathname": "/some/path", + "search": "", + "hash": "" + }, + { + "input": "", + "base": "http://user:pass@example.org:21/smth", + "href": "http://user:pass@example.org:21/smth", + "origin": "http://example.org:21", + "protocol": "http:", + "username": "user", + "password": "pass", + "host": "example.org:21", + "hostname": "example.org", + "port": "21", + "pathname": "/smth", + "search": "", + "hash": "" + }, + { + "input": "/some/path", + "base": "http://user:pass@example.org:21/smth", + "href": "http://user:pass@example.org:21/some/path", + "origin": "http://example.org:21", + "protocol": "http:", + "username": "user", + "password": "pass", + "host": "example.org:21", + "hostname": "example.org", + "port": "21", + "pathname": "/some/path", + "search": "", + "hash": "" + }, + "# a set of tests designed by zcorpan for relative URLs with unknown schemes", + { + "input": "i", + "base": "sc:sd", + "failure": true + }, + { + "input": "i", + "base": "sc:sd/sd", + "failure": true + }, + { + "input": "i", + "base": "sc:/pa/pa", + "href": "sc:/pa/i", + "origin": "null", + "protocol": "sc:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/pa/i", + "search": "", + "hash": "" + }, + { + "input": "i", + "base": "sc://ho/pa", + "href": "sc://ho/i", + "origin": "null", + "protocol": "sc:", + "username": "", + "password": "", + "host": "ho", + "hostname": "ho", + "port": "", + "pathname": "/i", + "search": "", + "hash": "" + }, + { + "input": "i", + "base": "sc:///pa/pa", + "href": "sc:///pa/i", + "origin": "null", + "protocol": "sc:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/pa/i", + "search": "", + "hash": "" + }, + { + "input": "../i", + "base": "sc:sd", + "failure": true + }, + { + "input": "../i", + "base": "sc:sd/sd", + "failure": true + }, + { + "input": "../i", + "base": "sc:/pa/pa", + "href": "sc:/i", + "origin": "null", + "protocol": "sc:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/i", + "search": "", + "hash": "" + }, + { + "input": "../i", + "base": "sc://ho/pa", + "href": "sc://ho/i", + "origin": "null", + "protocol": "sc:", + "username": "", + "password": "", + "host": "ho", + "hostname": "ho", + "port": "", + "pathname": "/i", + "search": "", + "hash": "" + }, + { + "input": "../i", + "base": "sc:///pa/pa", + "href": "sc:///i", + "origin": "null", + "protocol": "sc:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/i", + "search": "", + "hash": "" + }, + { + "input": "/i", + "base": "sc:sd", + "failure": true + }, + { + "input": "/i", + "base": "sc:sd/sd", + "failure": true + }, + { + "input": "/i", + "base": "sc:/pa/pa", + "href": "sc:/i", + "origin": "null", + "protocol": "sc:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/i", + "search": "", + "hash": "" + }, + { + "input": "/i", + "base": "sc://ho/pa", + "href": "sc://ho/i", + "origin": "null", + "protocol": "sc:", + "username": "", + "password": "", + "host": "ho", + "hostname": "ho", + "port": "", + "pathname": "/i", + "search": "", + "hash": "" + }, + { + "input": "/i", + "base": "sc:///pa/pa", + "href": "sc:///i", + "origin": "null", + "protocol": "sc:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/i", + "search": "", + "hash": "" + }, + { + "input": "?i", + "base": "sc:sd", + "failure": true + }, + { + "input": "?i", + "base": "sc:sd/sd", + "failure": true + }, + { + "input": "?i", + "base": "sc:/pa/pa", + "href": "sc:/pa/pa?i", + "origin": "null", + "protocol": "sc:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/pa/pa", + "search": "?i", + "hash": "" + }, + { + "input": "?i", + "base": "sc://ho/pa", + "href": "sc://ho/pa?i", + "origin": "null", + "protocol": "sc:", + "username": "", + "password": "", + "host": "ho", + "hostname": "ho", + "port": "", + "pathname": "/pa", + "search": "?i", + "hash": "" + }, + { + "input": "?i", + "base": "sc:///pa/pa", + "href": "sc:///pa/pa?i", + "origin": "null", + "protocol": "sc:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/pa/pa", + "search": "?i", + "hash": "" + }, + { + "input": "#i", + "base": "sc:sd", + "href": "sc:sd#i", + "origin": "null", + "protocol": "sc:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "sd", + "search": "", + "hash": "#i" + }, + { + "input": "#i", + "base": "sc:sd/sd", + "href": "sc:sd/sd#i", + "origin": "null", + "protocol": "sc:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "sd/sd", + "search": "", + "hash": "#i" + }, + { + "input": "#i", + "base": "sc:/pa/pa", + "href": "sc:/pa/pa#i", + "origin": "null", + "protocol": "sc:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/pa/pa", + "search": "", + "hash": "#i" + }, + { + "input": "#i", + "base": "sc://ho/pa", + "href": "sc://ho/pa#i", + "origin": "null", + "protocol": "sc:", + "username": "", + "password": "", + "host": "ho", + "hostname": "ho", + "port": "", + "pathname": "/pa", + "search": "", + "hash": "#i" + }, + { + "input": "#i", + "base": "sc:///pa/pa", + "href": "sc:///pa/pa#i", + "origin": "null", + "protocol": "sc:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/pa/pa", + "search": "", + "hash": "#i" + }, + "# make sure that relative URL logic works on known typically non-relative schemes too", + { + "input": "about:/../", + "base": null, + "href": "about:/", + "origin": "null", + "protocol": "about:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "data:/../", + "base": null, + "href": "data:/", + "origin": "null", + "protocol": "data:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "javascript:/../", + "base": null, + "href": "javascript:/", + "origin": "null", + "protocol": "javascript:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "mailto:/../", + "base": null, + "href": "mailto:/", + "origin": "null", + "protocol": "mailto:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + "# unknown schemes and their hosts", + { + "input": "sc://ñ.test/", + "base": null, + "href": "sc://%C3%B1.test/", + "origin": "null", + "protocol": "sc:", + "username": "", + "password": "", + "host": "%C3%B1.test", + "hostname": "%C3%B1.test", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "sc://%/", + "base": null, + "href": "sc://%/", + "protocol": "sc:", + "username": "", + "password": "", + "host": "%", + "hostname": "%", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "sc://@/", + "base": null, + "failure": true + }, + { + "input": "sc://te@s:t@/", + "base": null, + "failure": true + }, + { + "input": "sc://:/", + "base": null, + "failure": true + }, + { + "input": "sc://:12/", + "base": null, + "failure": true + }, + { + "input": "x", + "base": "sc://ñ", + "href": "sc://%C3%B1/x", + "origin": "null", + "protocol": "sc:", + "username": "", + "password": "", + "host": "%C3%B1", + "hostname": "%C3%B1", + "port": "", + "pathname": "/x", + "search": "", + "hash": "" + }, + "# unknown schemes and backslashes", + { + "input": "sc:\\../", + "base": null, + "href": "sc:\\../", + "origin": "null", + "protocol": "sc:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "\\../", + "search": "", + "hash": "" + }, + "# unknown scheme with path looking like a password", + { + "input": "sc::a@example.net", + "base": null, + "href": "sc::a@example.net", + "origin": "null", + "protocol": "sc:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": ":a@example.net", + "search": "", + "hash": "" + }, + "# unknown scheme with bogus percent-encoding", + { + "input": "wow:%NBD", + "base": null, + "href": "wow:%NBD", + "origin": "null", + "protocol": "wow:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "%NBD", + "search": "", + "hash": "" + }, + { + "input": "wow:%1G", + "base": null, + "href": "wow:%1G", + "origin": "null", + "protocol": "wow:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "%1G", + "search": "", + "hash": "" + }, + "# unknown scheme with non-URL characters", + { + "input": "wow:￿", + "base": null, + "href": "wow:%EF%BF%BF", + "origin": "null", + "protocol": "wow:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "%EF%BF%BF", + "search": "", + "hash": "" + }, + "Forbidden host code points", + { + "input": "sc://a\u0000b/", + "base": null, + "failure": true + }, + { + "input": "sc://a b/", + "base": null, + "failure": true + }, + { + "input": "sc://ab", + "base": null, + "failure": true + }, + { + "input": "sc://a[b/", + "base": null, + "failure": true + }, + { + "input": "sc://a\\b/", + "base": null, + "failure": true + }, + { + "input": "sc://a]b/", + "base": null, + "failure": true + }, + { + "input": "sc://a^b", + "base": null, + "failure": true + }, + { + "input": "sc://a|b/", + "base": null, + "failure": true + }, + "Forbidden host codepoints: tabs and newlines are removed during preprocessing", + { + "input": "foo://ho\tst/", + "base": null, + "hash": "", + "host": "host", + "hostname": "host", + "href": "foo://host/", + "password": "", + "pathname": "/", + "port": "", + "protocol": "foo:", + "search": "", + "username": "" + }, + { + "input": "foo://ho\nst/", + "base": null, + "hash": "", + "host": "host", + "hostname": "host", + "href": "foo://host/", + "password": "", + "pathname": "/", + "port": "", + "protocol": "foo:", + "search": "", + "username": "" + }, + { + "input": "foo://ho\rst/", + "base": null, + "hash": "", + "host": "host", + "hostname": "host", + "href": "foo://host/", + "password": "", + "pathname": "/", + "port": "", + "protocol": "foo:", + "search": "", + "username": "" + }, + "Forbidden domain code-points", + { + "input": "http://a\u0000b/", + "base": null, + "failure": true + }, + { + "input": "http://a\u0001b/", + "base": null, + "failure": true + }, + { + "input": "http://a\u0002b/", + "base": null, + "failure": true + }, + { + "input": "http://a\u0003b/", + "base": null, + "failure": true + }, + { + "input": "http://a\u0004b/", + "base": null, + "failure": true + }, + { + "input": "http://a\u0005b/", + "base": null, + "failure": true + }, + { + "input": "http://a\u0006b/", + "base": null, + "failure": true + }, + { + "input": "http://a\u0007b/", + "base": null, + "failure": true + }, + { + "input": "http://a\bb/", + "base": null, + "failure": true + }, + { + "input": "http://a\u000bb/", + "base": null, + "failure": true + }, + { + "input": "http://a\fb/", + "base": null, + "failure": true + }, + { + "input": "http://a\u000eb/", + "base": null, + "failure": true + }, + { + "input": "http://a\u000fb/", + "base": null, + "failure": true + }, + { + "input": "http://a\u0010b/", + "base": null, + "failure": true + }, + { + "input": "http://a\u0011b/", + "base": null, + "failure": true + }, + { + "input": "http://a\u0012b/", + "base": null, + "failure": true + }, + { + "input": "http://a\u0013b/", + "base": null, + "failure": true + }, + { + "input": "http://a\u0014b/", + "base": null, + "failure": true + }, + { + "input": "http://a\u0015b/", + "base": null, + "failure": true + }, + { + "input": "http://a\u0016b/", + "base": null, + "failure": true + }, + { + "input": "http://a\u0017b/", + "base": null, + "failure": true + }, + { + "input": "http://a\u0018b/", + "base": null, + "failure": true + }, + { + "input": "http://a\u0019b/", + "base": null, + "failure": true + }, + { + "input": "http://a\u001ab/", + "base": null, + "failure": true + }, + { + "input": "http://a\u001bb/", + "base": null, + "failure": true + }, + { + "input": "http://a\u001cb/", + "base": null, + "failure": true + }, + { + "input": "http://a\u001db/", + "base": null, + "failure": true + }, + { + "input": "http://a\u001eb/", + "base": null, + "failure": true + }, + { + "input": "http://a\u001fb/", + "base": null, + "failure": true + }, + { + "input": "http://a b/", + "base": null, + "failure": true + }, + { + "input": "http://a%b/", + "base": null, + "failure": true + }, + { + "input": "http://ab", + "base": null, + "failure": true + }, + { + "input": "http://a[b/", + "base": null, + "failure": true + }, + { + "input": "http://a]b/", + "base": null, + "failure": true + }, + { + "input": "http://a^b", + "base": null, + "failure": true + }, + { + "input": "http://a|b/", + "base": null, + "failure": true + }, + { + "input": "http://ab/", + "base": null, + "failure": true + }, + "Forbidden domain codepoints: tabs and newlines are removed during preprocessing", + { + "input": "http://ho\tst/", + "base": null, + "hash": "", + "host": "host", + "hostname": "host", + "href": "http://host/", + "password": "", + "pathname": "/", + "port": "", + "protocol": "http:", + "search": "", + "username": "" + }, + { + "input": "http://ho\nst/", + "base": null, + "hash": "", + "host": "host", + "hostname": "host", + "href": "http://host/", + "password": "", + "pathname": "/", + "port": "", + "protocol": "http:", + "search": "", + "username": "" + }, + { + "input": "http://ho\rst/", + "base": null, + "hash": "", + "host": "host", + "hostname": "host", + "href": "http://host/", + "password": "", + "pathname": "/", + "port": "", + "protocol": "http:", + "search": "", + "username": "" + }, + "Encoded forbidden domain codepoints in special URLs", + { + "input": "http://ho%00st/", + "base": null, + "failure": true + }, + { + "input": "http://ho%01st/", + "base": null, + "failure": true + }, + { + "input": "http://ho%02st/", + "base": null, + "failure": true + }, + { + "input": "http://ho%03st/", + "base": null, + "failure": true + }, + { + "input": "http://ho%04st/", + "base": null, + "failure": true + }, + { + "input": "http://ho%05st/", + "base": null, + "failure": true + }, + { + "input": "http://ho%06st/", + "base": null, + "failure": true + }, + { + "input": "http://ho%07st/", + "base": null, + "failure": true + }, + { + "input": "http://ho%08st/", + "base": null, + "failure": true + }, + { + "input": "http://ho%09st/", + "base": null, + "failure": true + }, + { + "input": "http://ho%0Ast/", + "base": null, + "failure": true + }, + { + "input": "http://ho%0Bst/", + "base": null, + "failure": true + }, + { + "input": "http://ho%0Cst/", + "base": null, + "failure": true + }, + { + "input": "http://ho%0Dst/", + "base": null, + "failure": true + }, + { + "input": "http://ho%0Est/", + "base": null, + "failure": true + }, + { + "input": "http://ho%0Fst/", + "base": null, + "failure": true + }, + { + "input": "http://ho%10st/", + "base": null, + "failure": true + }, + { + "input": "http://ho%11st/", + "base": null, + "failure": true + }, + { + "input": "http://ho%12st/", + "base": null, + "failure": true + }, + { + "input": "http://ho%13st/", + "base": null, + "failure": true + }, + { + "input": "http://ho%14st/", + "base": null, + "failure": true + }, + { + "input": "http://ho%15st/", + "base": null, + "failure": true + }, + { + "input": "http://ho%16st/", + "base": null, + "failure": true + }, + { + "input": "http://ho%17st/", + "base": null, + "failure": true + }, + { + "input": "http://ho%18st/", + "base": null, + "failure": true + }, + { + "input": "http://ho%19st/", + "base": null, + "failure": true + }, + { + "input": "http://ho%1Ast/", + "base": null, + "failure": true + }, + { + "input": "http://ho%1Bst/", + "base": null, + "failure": true + }, + { + "input": "http://ho%1Cst/", + "base": null, + "failure": true + }, + { + "input": "http://ho%1Dst/", + "base": null, + "failure": true + }, + { + "input": "http://ho%1Est/", + "base": null, + "failure": true + }, + { + "input": "http://ho%1Fst/", + "base": null, + "failure": true + }, + { + "input": "http://ho%20st/", + "base": null, + "failure": true + }, + { + "input": "http://ho%23st/", + "base": null, + "failure": true + }, + { + "input": "http://ho%25st/", + "base": null, + "failure": true + }, + { + "input": "http://ho%2Fst/", + "base": null, + "failure": true + }, + { + "input": "http://ho%3Ast/", + "base": null, + "failure": true + }, + { + "input": "http://ho%3Cst/", + "base": null, + "failure": true + }, + { + "input": "http://ho%3Est/", + "base": null, + "failure": true + }, + { + "input": "http://ho%3Fst/", + "base": null, + "failure": true + }, + { + "input": "http://ho%40st/", + "base": null, + "failure": true + }, + { + "input": "http://ho%5Bst/", + "base": null, + "failure": true + }, + { + "input": "http://ho%5Cst/", + "base": null, + "failure": true + }, + { + "input": "http://ho%5Dst/", + "base": null, + "failure": true + }, + { + "input": "http://ho%7Cst/", + "base": null, + "failure": true + }, + { + "input": "http://ho%7Fst/", + "base": null, + "failure": true + }, + "Allowed host/domain code points", + { + "input": "http://!\"$&'()*+,-.;=_`{}~/", + "base": null, + "href": "http://!\"$&'()*+,-.;=_`{}~/", + "origin": "http://!\"$&'()*+,-.;=_`{}~", + "protocol": "http:", + "username": "", + "password": "", + "host": "!\"$&'()*+,-.;=_`{}~", + "hostname": "!\"$&'()*+,-.;=_`{}~", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "sc://\u0001\u0002\u0003\u0004\u0005\u0006\u0007\b\u000b\f\u000e\u000f\u0010\u0011\u0012\u0013\u0014\u0015\u0016\u0017\u0018\u0019\u001a\u001b\u001c\u001d\u001e\u001f!\"$%&'()*+,-.;=_`{}~/", + "base": null, + "href": "sc://%01%02%03%04%05%06%07%08%0B%0C%0E%0F%10%11%12%13%14%15%16%17%18%19%1A%1B%1C%1D%1E%1F%7F!\"$%&'()*+,-.;=_`{}~/", + "origin": "null", + "protocol": "sc:", + "username": "", + "password": "", + "host": "%01%02%03%04%05%06%07%08%0B%0C%0E%0F%10%11%12%13%14%15%16%17%18%19%1A%1B%1C%1D%1E%1F%7F!\"$%&'()*+,-.;=_`{}~", + "hostname": "%01%02%03%04%05%06%07%08%0B%0C%0E%0F%10%11%12%13%14%15%16%17%18%19%1A%1B%1C%1D%1E%1F%7F!\"$%&'()*+,-.;=_`{}~", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + "# Hosts and percent-encoding", + { + "input": "ftp://example.com%80/", + "base": null, + "failure": true + }, + { + "input": "ftp://example.com%A0/", + "base": null, + "failure": true + }, + { + "input": "https://example.com%80/", + "base": null, + "failure": true + }, + { + "input": "https://example.com%A0/", + "base": null, + "failure": true + }, + { + "input": "ftp://%e2%98%83", + "base": null, + "href": "ftp://xn--n3h/", + "origin": "ftp://xn--n3h", + "protocol": "ftp:", + "username": "", + "password": "", + "host": "xn--n3h", + "hostname": "xn--n3h", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "https://%e2%98%83", + "base": null, + "href": "https://xn--n3h/", + "origin": "https://xn--n3h", + "protocol": "https:", + "username": "", + "password": "", + "host": "xn--n3h", + "hostname": "xn--n3h", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + "# tests from jsdom/whatwg-url designed for code coverage", + { + "input": "http://127.0.0.1:10100/relative_import.html", + "base": null, + "href": "http://127.0.0.1:10100/relative_import.html", + "origin": "http://127.0.0.1:10100", + "protocol": "http:", + "username": "", + "password": "", + "host": "127.0.0.1:10100", + "hostname": "127.0.0.1", + "port": "10100", + "pathname": "/relative_import.html", + "search": "", + "hash": "" + }, + { + "input": "http://facebook.com/?foo=%7B%22abc%22", + "base": null, + "href": "http://facebook.com/?foo=%7B%22abc%22", + "origin": "http://facebook.com", + "protocol": "http:", + "username": "", + "password": "", + "host": "facebook.com", + "hostname": "facebook.com", + "port": "", + "pathname": "/", + "search": "?foo=%7B%22abc%22", + "hash": "" + }, + { + "input": "https://localhost:3000/jqueryui@1.2.3", + "base": null, + "href": "https://localhost:3000/jqueryui@1.2.3", + "origin": "https://localhost:3000", + "protocol": "https:", + "username": "", + "password": "", + "host": "localhost:3000", + "hostname": "localhost", + "port": "3000", + "pathname": "/jqueryui@1.2.3", + "search": "", + "hash": "" + }, + "# tab/LF/CR", + { + "input": "h\tt\nt\rp://h\to\ns\rt:9\t0\n0\r0/p\ta\nt\rh?q\tu\ne\rry#f\tr\na\rg", + "base": null, + "href": "http://host:9000/path?query#frag", + "origin": "http://host:9000", + "protocol": "http:", + "username": "", + "password": "", + "host": "host:9000", + "hostname": "host", + "port": "9000", + "pathname": "/path", + "search": "?query", + "hash": "#frag" + }, + "# Stringification of URL.searchParams", + { + "input": "?a=b&c=d", + "base": "http://example.org/foo/bar", + "href": "http://example.org/foo/bar?a=b&c=d", + "origin": "http://example.org", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.org", + "hostname": "example.org", + "port": "", + "pathname": "/foo/bar", + "search": "?a=b&c=d", + "searchParams": "a=b&c=d", + "hash": "" + }, + { + "input": "??a=b&c=d", + "base": "http://example.org/foo/bar", + "href": "http://example.org/foo/bar??a=b&c=d", + "origin": "http://example.org", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.org", + "hostname": "example.org", + "port": "", + "pathname": "/foo/bar", + "search": "??a=b&c=d", + "searchParams": "%3Fa=b&c=d", + "hash": "" + }, + "# Scheme only", + { + "input": "http:", + "base": "http://example.org/foo/bar", + "href": "http://example.org/foo/bar", + "origin": "http://example.org", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.org", + "hostname": "example.org", + "port": "", + "pathname": "/foo/bar", + "search": "", + "searchParams": "", + "hash": "" + }, + { + "input": "http:", + "base": "https://example.org/foo/bar", + "failure": true + }, + { + "input": "sc:", + "base": "https://example.org/foo/bar", + "href": "sc:", + "origin": "null", + "protocol": "sc:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "", + "search": "", + "searchParams": "", + "hash": "" + }, + "# Percent encoding of fragments", + { + "input": "http://foo.bar/baz?qux#foo\bbar", + "base": null, + "href": "http://foo.bar/baz?qux#foo%08bar", + "origin": "http://foo.bar", + "protocol": "http:", + "username": "", + "password": "", + "host": "foo.bar", + "hostname": "foo.bar", + "port": "", + "pathname": "/baz", + "search": "?qux", + "searchParams": "qux=", + "hash": "#foo%08bar" + }, + { + "input": "http://foo.bar/baz?qux#foo\"bar", + "base": null, + "href": "http://foo.bar/baz?qux#foo%22bar", + "origin": "http://foo.bar", + "protocol": "http:", + "username": "", + "password": "", + "host": "foo.bar", + "hostname": "foo.bar", + "port": "", + "pathname": "/baz", + "search": "?qux", + "searchParams": "qux=", + "hash": "#foo%22bar" + }, + { + "input": "http://foo.bar/baz?qux#foobar", + "base": null, + "href": "http://foo.bar/baz?qux#foo%3Ebar", + "origin": "http://foo.bar", + "protocol": "http:", + "username": "", + "password": "", + "host": "foo.bar", + "hostname": "foo.bar", + "port": "", + "pathname": "/baz", + "search": "?qux", + "searchParams": "qux=", + "hash": "#foo%3Ebar" + }, + { + "input": "http://foo.bar/baz?qux#foo`bar", + "base": null, + "href": "http://foo.bar/baz?qux#foo%60bar", + "origin": "http://foo.bar", + "protocol": "http:", + "username": "", + "password": "", + "host": "foo.bar", + "hostname": "foo.bar", + "port": "", + "pathname": "/baz", + "search": "?qux", + "searchParams": "qux=", + "hash": "#foo%60bar" + }, + "# IPv4 parsing (via https://github.com/nodejs/node/pull/10317)", + { + "input": "http://1.2.3.4/", + "base": "http://other.com/", + "href": "http://1.2.3.4/", + "origin": "http://1.2.3.4", + "protocol": "http:", + "username": "", + "password": "", + "host": "1.2.3.4", + "hostname": "1.2.3.4", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "http://1.2.3.4./", + "base": "http://other.com/", + "href": "http://1.2.3.4/", + "origin": "http://1.2.3.4", + "protocol": "http:", + "username": "", + "password": "", + "host": "1.2.3.4", + "hostname": "1.2.3.4", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "http://192.168.257", + "base": "http://other.com/", + "href": "http://192.168.1.1/", + "origin": "http://192.168.1.1", + "protocol": "http:", + "username": "", + "password": "", + "host": "192.168.1.1", + "hostname": "192.168.1.1", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "http://192.168.257.", + "base": "http://other.com/", + "href": "http://192.168.1.1/", + "origin": "http://192.168.1.1", + "protocol": "http:", + "username": "", + "password": "", + "host": "192.168.1.1", + "hostname": "192.168.1.1", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "http://192.168.257.com", + "base": "http://other.com/", + "href": "http://192.168.257.com/", + "origin": "http://192.168.257.com", + "protocol": "http:", + "username": "", + "password": "", + "host": "192.168.257.com", + "hostname": "192.168.257.com", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "http://256", + "base": "http://other.com/", + "href": "http://0.0.1.0/", + "origin": "http://0.0.1.0", + "protocol": "http:", + "username": "", + "password": "", + "host": "0.0.1.0", + "hostname": "0.0.1.0", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "http://256.com", + "base": "http://other.com/", + "href": "http://256.com/", + "origin": "http://256.com", + "protocol": "http:", + "username": "", + "password": "", + "host": "256.com", + "hostname": "256.com", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "http://999999999", + "base": "http://other.com/", + "href": "http://59.154.201.255/", + "origin": "http://59.154.201.255", + "protocol": "http:", + "username": "", + "password": "", + "host": "59.154.201.255", + "hostname": "59.154.201.255", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "http://999999999.", + "base": "http://other.com/", + "href": "http://59.154.201.255/", + "origin": "http://59.154.201.255", + "protocol": "http:", + "username": "", + "password": "", + "host": "59.154.201.255", + "hostname": "59.154.201.255", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "http://999999999.com", + "base": "http://other.com/", + "href": "http://999999999.com/", + "origin": "http://999999999.com", + "protocol": "http:", + "username": "", + "password": "", + "host": "999999999.com", + "hostname": "999999999.com", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "http://10000000000", + "base": "http://other.com/", + "failure": true + }, + { + "input": "http://10000000000.com", + "base": "http://other.com/", + "href": "http://10000000000.com/", + "origin": "http://10000000000.com", + "protocol": "http:", + "username": "", + "password": "", + "host": "10000000000.com", + "hostname": "10000000000.com", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "http://4294967295", + "base": "http://other.com/", + "href": "http://255.255.255.255/", + "origin": "http://255.255.255.255", + "protocol": "http:", + "username": "", + "password": "", + "host": "255.255.255.255", + "hostname": "255.255.255.255", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "http://4294967296", + "base": "http://other.com/", + "failure": true + }, + { + "input": "http://0xffffffff", + "base": "http://other.com/", + "href": "http://255.255.255.255/", + "origin": "http://255.255.255.255", + "protocol": "http:", + "username": "", + "password": "", + "host": "255.255.255.255", + "hostname": "255.255.255.255", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "http://0xffffffff1", + "base": "http://other.com/", + "failure": true + }, + { + "input": "http://256.256.256.256", + "base": "http://other.com/", + "failure": true + }, + { + "input": "https://0x.0x.0", + "base": null, + "href": "https://0.0.0.0/", + "origin": "https://0.0.0.0", + "protocol": "https:", + "username": "", + "password": "", + "host": "0.0.0.0", + "hostname": "0.0.0.0", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + "More IPv4 parsing (via https://github.com/jsdom/whatwg-url/issues/92)", + { + "input": "https://0x100000000/test", + "base": null, + "failure": true + }, + { + "input": "https://256.0.0.1/test", + "base": null, + "failure": true + }, + "# file URLs containing percent-encoded Windows drive letters (shouldn't work)", + { + "input": "file:///C%3A/", + "base": null, + "href": "file:///C%3A/", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/C%3A/", + "search": "", + "hash": "" + }, + { + "input": "file:///C%7C/", + "base": null, + "href": "file:///C%7C/", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/C%7C/", + "search": "", + "hash": "" + }, + { + "input": "file://%43%3A", + "base": null, + "failure": true + }, + { + "input": "file://%43%7C", + "base": null, + "failure": true + }, + { + "input": "file://%43|", + "base": null, + "failure": true + }, + { + "input": "file://C%7C", + "base": null, + "failure": true + }, + { + "input": "file://%43%7C/", + "base": null, + "failure": true + }, + { + "input": "https://%43%7C/", + "base": null, + "failure": true + }, + { + "input": "asdf://%43|/", + "base": null, + "failure": true + }, + { + "input": "asdf://%43%7C/", + "base": null, + "href": "asdf://%43%7C/", + "origin": "null", + "protocol": "asdf:", + "username": "", + "password": "", + "host": "%43%7C", + "hostname": "%43%7C", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + "# file URLs relative to other file URLs (via https://github.com/jsdom/whatwg-url/pull/60)", + { + "input": "pix/submit.gif", + "base": "file:///C:/Users/Domenic/Dropbox/GitHub/tmpvar/jsdom/test/level2/html/files/anchor.html", + "href": "file:///C:/Users/Domenic/Dropbox/GitHub/tmpvar/jsdom/test/level2/html/files/pix/submit.gif", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/C:/Users/Domenic/Dropbox/GitHub/tmpvar/jsdom/test/level2/html/files/pix/submit.gif", + "search": "", + "hash": "" + }, + { + "input": "..", + "base": "file:///C:/", + "href": "file:///C:/", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/C:/", + "search": "", + "hash": "" + }, + { + "input": "..", + "base": "file:///", + "href": "file:///", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + "# More file URL tests by zcorpan and annevk", + { + "input": "/", + "base": "file:///C:/a/b", + "href": "file:///C:/", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/C:/", + "search": "", + "hash": "" + }, + { + "input": "/", + "base": "file://h/C:/a/b", + "href": "file://h/C:/", + "protocol": "file:", + "username": "", + "password": "", + "host": "h", + "hostname": "h", + "port": "", + "pathname": "/C:/", + "search": "", + "hash": "" + }, + { + "input": "/", + "base": "file://h/a/b", + "href": "file://h/", + "protocol": "file:", + "username": "", + "password": "", + "host": "h", + "hostname": "h", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "//d:", + "base": "file:///C:/a/b", + "href": "file:///d:", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/d:", + "search": "", + "hash": "" + }, + { + "input": "//d:/..", + "base": "file:///C:/a/b", + "href": "file:///d:/", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/d:/", + "search": "", + "hash": "" + }, + { + "input": "..", + "base": "file:///ab:/", + "href": "file:///", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "..", + "base": "file:///1:/", + "href": "file:///", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "", + "base": "file:///test?test#test", + "href": "file:///test?test", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/test", + "search": "?test", + "hash": "" + }, + { + "input": "file:", + "base": "file:///test?test#test", + "href": "file:///test?test", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/test", + "search": "?test", + "hash": "" + }, + { + "input": "?x", + "base": "file:///test?test#test", + "href": "file:///test?x", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/test", + "search": "?x", + "hash": "" + }, + { + "input": "file:?x", + "base": "file:///test?test#test", + "href": "file:///test?x", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/test", + "search": "?x", + "hash": "" + }, + { + "input": "#x", + "base": "file:///test?test#test", + "href": "file:///test?test#x", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/test", + "search": "?test", + "hash": "#x" + }, + { + "input": "file:#x", + "base": "file:///test?test#test", + "href": "file:///test?test#x", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/test", + "search": "?test", + "hash": "#x" + }, + "# File URLs and many (back)slashes", + { + "input": "file:\\\\//", + "base": null, + "href": "file:////", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "//", + "search": "", + "hash": "" + }, + { + "input": "file:\\\\\\\\", + "base": null, + "href": "file:////", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "//", + "search": "", + "hash": "" + }, + { + "input": "file:\\\\\\\\?fox", + "base": null, + "href": "file:////?fox", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "//", + "search": "?fox", + "hash": "" + }, + { + "input": "file:\\\\\\\\#guppy", + "base": null, + "href": "file:////#guppy", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "//", + "search": "", + "hash": "#guppy" + }, + { + "input": "file://spider///", + "base": null, + "href": "file://spider///", + "protocol": "file:", + "username": "", + "password": "", + "host": "spider", + "hostname": "spider", + "port": "", + "pathname": "///", + "search": "", + "hash": "" + }, + { + "input": "file:\\\\localhost//", + "base": null, + "href": "file:////", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "//", + "search": "", + "hash": "" + }, + { + "input": "file:///localhost//cat", + "base": null, + "href": "file:///localhost//cat", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/localhost//cat", + "search": "", + "hash": "" + }, + { + "input": "file://\\/localhost//cat", + "base": null, + "href": "file:////localhost//cat", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "//localhost//cat", + "search": "", + "hash": "" + }, + { + "input": "file://localhost//a//../..//", + "base": null, + "href": "file://///", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "///", + "search": "", + "hash": "" + }, + { + "input": "/////mouse", + "base": "file:///elephant", + "href": "file://///mouse", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "///mouse", + "search": "", + "hash": "" + }, + { + "input": "\\//pig", + "base": "file://lion/", + "href": "file:///pig", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/pig", + "search": "", + "hash": "" + }, + { + "input": "\\/localhost//pig", + "base": "file://lion/", + "href": "file:////pig", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "//pig", + "search": "", + "hash": "" + }, + { + "input": "//localhost//pig", + "base": "file://lion/", + "href": "file:////pig", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "//pig", + "search": "", + "hash": "" + }, + { + "input": "/..//localhost//pig", + "base": "file://lion/", + "href": "file://lion//localhost//pig", + "protocol": "file:", + "username": "", + "password": "", + "host": "lion", + "hostname": "lion", + "port": "", + "pathname": "//localhost//pig", + "search": "", + "hash": "" + }, + { + "input": "file://", + "base": "file://ape/", + "href": "file:///", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + "# File URLs with non-empty hosts", + { + "input": "/rooibos", + "base": "file://tea/", + "href": "file://tea/rooibos", + "protocol": "file:", + "username": "", + "password": "", + "host": "tea", + "hostname": "tea", + "port": "", + "pathname": "/rooibos", + "search": "", + "hash": "" + }, + { + "input": "/?chai", + "base": "file://tea/", + "href": "file://tea/?chai", + "protocol": "file:", + "username": "", + "password": "", + "host": "tea", + "hostname": "tea", + "port": "", + "pathname": "/", + "search": "?chai", + "hash": "" + }, + "# Windows drive letter handling with the 'file:' base URL", + { + "input": "C|", + "base": "file://host/dir/file", + "href": "file://host/C:", + "protocol": "file:", + "username": "", + "password": "", + "host": "host", + "hostname": "host", + "port": "", + "pathname": "/C:", + "search": "", + "hash": "" + }, + { + "input": "C|", + "base": "file://host/D:/dir1/dir2/file", + "href": "file://host/C:", + "protocol": "file:", + "username": "", + "password": "", + "host": "host", + "hostname": "host", + "port": "", + "pathname": "/C:", + "search": "", + "hash": "" + }, + { + "input": "C|#", + "base": "file://host/dir/file", + "href": "file://host/C:#", + "protocol": "file:", + "username": "", + "password": "", + "host": "host", + "hostname": "host", + "port": "", + "pathname": "/C:", + "search": "", + "hash": "" + }, + { + "input": "C|?", + "base": "file://host/dir/file", + "href": "file://host/C:?", + "protocol": "file:", + "username": "", + "password": "", + "host": "host", + "hostname": "host", + "port": "", + "pathname": "/C:", + "search": "", + "hash": "" + }, + { + "input": "C|/", + "base": "file://host/dir/file", + "href": "file://host/C:/", + "protocol": "file:", + "username": "", + "password": "", + "host": "host", + "hostname": "host", + "port": "", + "pathname": "/C:/", + "search": "", + "hash": "" + }, + { + "input": "C|\n/", + "base": "file://host/dir/file", + "href": "file://host/C:/", + "protocol": "file:", + "username": "", + "password": "", + "host": "host", + "hostname": "host", + "port": "", + "pathname": "/C:/", + "search": "", + "hash": "" + }, + { + "input": "C|\\", + "base": "file://host/dir/file", + "href": "file://host/C:/", + "protocol": "file:", + "username": "", + "password": "", + "host": "host", + "hostname": "host", + "port": "", + "pathname": "/C:/", + "search": "", + "hash": "" + }, + { + "input": "C", + "base": "file://host/dir/file", + "href": "file://host/dir/C", + "protocol": "file:", + "username": "", + "password": "", + "host": "host", + "hostname": "host", + "port": "", + "pathname": "/dir/C", + "search": "", + "hash": "" + }, + { + "input": "C|a", + "base": "file://host/dir/file", + "href": "file://host/dir/C|a", + "protocol": "file:", + "username": "", + "password": "", + "host": "host", + "hostname": "host", + "port": "", + "pathname": "/dir/C|a", + "search": "", + "hash": "" + }, + "# Windows drive letter quirk in the file slash state", + { + "input": "/c:/foo/bar", + "base": "file:///c:/baz/qux", + "href": "file:///c:/foo/bar", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/c:/foo/bar", + "search": "", + "hash": "" + }, + { + "input": "/c|/foo/bar", + "base": "file:///c:/baz/qux", + "href": "file:///c:/foo/bar", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/c:/foo/bar", + "search": "", + "hash": "" + }, + { + "input": "file:\\c:\\foo\\bar", + "base": "file:///c:/baz/qux", + "href": "file:///c:/foo/bar", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/c:/foo/bar", + "search": "", + "hash": "" + }, + { + "input": "/c:/foo/bar", + "base": "file://host/path", + "href": "file://host/c:/foo/bar", + "protocol": "file:", + "username": "", + "password": "", + "host": "host", + "hostname": "host", + "port": "", + "pathname": "/c:/foo/bar", + "search": "", + "hash": "" + }, + "# Do not drop the host in the presence of a drive letter", + { + "input": "file://example.net/C:/", + "base": null, + "href": "file://example.net/C:/", + "protocol": "file:", + "username": "", + "password": "", + "host": "example.net", + "hostname": "example.net", + "port": "", + "pathname": "/C:/", + "search": "", + "hash": "" + }, + { + "input": "file://1.2.3.4/C:/", + "base": null, + "href": "file://1.2.3.4/C:/", + "protocol": "file:", + "username": "", + "password": "", + "host": "1.2.3.4", + "hostname": "1.2.3.4", + "port": "", + "pathname": "/C:/", + "search": "", + "hash": "" + }, + { + "input": "file://[1::8]/C:/", + "base": null, + "href": "file://[1::8]/C:/", + "protocol": "file:", + "username": "", + "password": "", + "host": "[1::8]", + "hostname": "[1::8]", + "port": "", + "pathname": "/C:/", + "search": "", + "hash": "" + }, + "# Copy the host from the base URL in the following cases", + { + "input": "C|/", + "base": "file://host/", + "href": "file://host/C:/", + "protocol": "file:", + "username": "", + "password": "", + "host": "host", + "hostname": "host", + "port": "", + "pathname": "/C:/", + "search": "", + "hash": "" + }, + { + "input": "/C:/", + "base": "file://host/", + "href": "file://host/C:/", + "protocol": "file:", + "username": "", + "password": "", + "host": "host", + "hostname": "host", + "port": "", + "pathname": "/C:/", + "search": "", + "hash": "" + }, + { + "input": "file:C:/", + "base": "file://host/", + "href": "file://host/C:/", + "protocol": "file:", + "username": "", + "password": "", + "host": "host", + "hostname": "host", + "port": "", + "pathname": "/C:/", + "search": "", + "hash": "" + }, + { + "input": "file:/C:/", + "base": "file://host/", + "href": "file://host/C:/", + "protocol": "file:", + "username": "", + "password": "", + "host": "host", + "hostname": "host", + "port": "", + "pathname": "/C:/", + "search": "", + "hash": "" + }, + "# Copy the empty host from the input in the following cases", + { + "input": "//C:/", + "base": "file://host/", + "href": "file:///C:/", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/C:/", + "search": "", + "hash": "" + }, + { + "input": "file://C:/", + "base": "file://host/", + "href": "file:///C:/", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/C:/", + "search": "", + "hash": "" + }, + { + "input": "///C:/", + "base": "file://host/", + "href": "file:///C:/", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/C:/", + "search": "", + "hash": "" + }, + { + "input": "file:///C:/", + "base": "file://host/", + "href": "file:///C:/", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/C:/", + "search": "", + "hash": "" + }, + "# Windows drive letter quirk (no host)", + { + "input": "file:/C|/", + "base": null, + "href": "file:///C:/", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/C:/", + "search": "", + "hash": "" + }, + { + "input": "file://C|/", + "base": null, + "href": "file:///C:/", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/C:/", + "search": "", + "hash": "" + }, + "# file URLs without base URL by Rimas Misevičius", + { + "input": "file:", + "base": null, + "href": "file:///", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "file:?q=v", + "base": null, + "href": "file:///?q=v", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/", + "search": "?q=v", + "hash": "" + }, + { + "input": "file:#frag", + "base": null, + "href": "file:///#frag", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/", + "search": "", + "hash": "#frag" + }, + "# file: drive letter cases from https://crbug.com/1078698", + { + "input": "file:///Y:", + "base": null, + "href": "file:///Y:", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/Y:", + "search": "", + "hash": "" + }, + { + "input": "file:///Y:/", + "base": null, + "href": "file:///Y:/", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/Y:/", + "search": "", + "hash": "" + }, + { + "input": "file:///./Y", + "base": null, + "href": "file:///Y", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/Y", + "search": "", + "hash": "" + }, + { + "input": "file:///./Y:", + "base": null, + "href": "file:///Y:", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/Y:", + "search": "", + "hash": "" + }, + { + "input": "\\\\\\.\\Y:", + "base": null, + "failure": true, + "relativeTo": "non-opaque-path-base" + }, + "# file: drive letter cases from https://crbug.com/1078698 but lowercased", + { + "input": "file:///y:", + "base": null, + "href": "file:///y:", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/y:", + "search": "", + "hash": "" + }, + { + "input": "file:///y:/", + "base": null, + "href": "file:///y:/", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/y:/", + "search": "", + "hash": "" + }, + { + "input": "file:///./y", + "base": null, + "href": "file:///y", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/y", + "search": "", + "hash": "" + }, + { + "input": "file:///./y:", + "base": null, + "href": "file:///y:", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/y:", + "search": "", + "hash": "" + }, + { + "input": "\\\\\\.\\y:", + "base": null, + "failure": true, + "relativeTo": "non-opaque-path-base" + }, + "# Additional file URL tests for (https://github.com/whatwg/url/issues/405)", + { + "input": "file://localhost//a//../..//foo", + "base": null, + "href": "file://///foo", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "///foo", + "search": "", + "hash": "" + }, + { + "input": "file://localhost////foo", + "base": null, + "href": "file://////foo", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "////foo", + "search": "", + "hash": "" + }, + { + "input": "file:////foo", + "base": null, + "href": "file:////foo", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "//foo", + "search": "", + "hash": "" + }, + { + "input": "file:///one/two", + "base": "file:///", + "href": "file:///one/two", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/one/two", + "search": "", + "hash": "" + }, + { + "input": "file:////one/two", + "base": "file:///", + "href": "file:////one/two", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "//one/two", + "search": "", + "hash": "" + }, + { + "input": "//one/two", + "base": "file:///", + "href": "file://one/two", + "protocol": "file:", + "username": "", + "password": "", + "host": "one", + "hostname": "one", + "port": "", + "pathname": "/two", + "search": "", + "hash": "" + }, + { + "input": "///one/two", + "base": "file:///", + "href": "file:///one/two", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/one/two", + "search": "", + "hash": "" + }, + { + "input": "////one/two", + "base": "file:///", + "href": "file:////one/two", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "//one/two", + "search": "", + "hash": "" + }, + { + "input": "file:///.//", + "base": "file:////", + "href": "file:////", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "//", + "search": "", + "hash": "" + }, + "File URL tests for https://github.com/whatwg/url/issues/549", + { + "input": "file:.//p", + "base": null, + "href": "file:////p", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "//p", + "search": "", + "hash": "" + }, + { + "input": "file:/.//p", + "base": null, + "href": "file:////p", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "//p", + "search": "", + "hash": "" + }, + "# IPv6 tests", + { + "input": "http://[1:0::]", + "base": "http://example.net/", + "href": "http://[1::]/", + "origin": "http://[1::]", + "protocol": "http:", + "username": "", + "password": "", + "host": "[1::]", + "hostname": "[1::]", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "http://[0:1:2:3:4:5:6:7:8]", + "base": "http://example.net/", + "failure": true + }, + { + "input": "https://[0::0::0]", + "base": null, + "failure": true + }, + { + "input": "https://[0:.0]", + "base": null, + "failure": true + }, + { + "input": "https://[0:0:]", + "base": null, + "failure": true + }, + { + "input": "https://[0:1:2:3:4:5:6:7.0.0.0.1]", + "base": null, + "failure": true + }, + { + "input": "https://[0:1.00.0.0.0]", + "base": null, + "failure": true + }, + { + "input": "https://[0:1.290.0.0.0]", + "base": null, + "failure": true + }, + { + "input": "https://[0:1.23.23]", + "base": null, + "failure": true + }, + "# Empty host", + { + "input": "http://?", + "base": null, + "failure": true + }, + { + "input": "http://#", + "base": null, + "failure": true + }, + "Port overflow (2^32 + 81)", + { + "input": "http://f:4294967377/c", + "base": "http://example.org/", + "failure": true + }, + "Port overflow (2^64 + 81)", + { + "input": "http://f:18446744073709551697/c", + "base": "http://example.org/", + "failure": true + }, + "Port overflow (2^128 + 81)", + { + "input": "http://f:340282366920938463463374607431768211537/c", + "base": "http://example.org/", + "failure": true + }, + "# Non-special-URL path tests", + { + "input": "sc://ñ", + "base": null, + "href": "sc://%C3%B1", + "origin": "null", + "protocol": "sc:", + "username": "", + "password": "", + "host": "%C3%B1", + "hostname": "%C3%B1", + "port": "", + "pathname": "", + "search": "", + "hash": "" + }, + { + "input": "sc://ñ?x", + "base": null, + "href": "sc://%C3%B1?x", + "origin": "null", + "protocol": "sc:", + "username": "", + "password": "", + "host": "%C3%B1", + "hostname": "%C3%B1", + "port": "", + "pathname": "", + "search": "?x", + "hash": "" + }, + { + "input": "sc://ñ#x", + "base": null, + "href": "sc://%C3%B1#x", + "origin": "null", + "protocol": "sc:", + "username": "", + "password": "", + "host": "%C3%B1", + "hostname": "%C3%B1", + "port": "", + "pathname": "", + "search": "", + "hash": "#x" + }, + { + "input": "#x", + "base": "sc://ñ", + "href": "sc://%C3%B1#x", + "origin": "null", + "protocol": "sc:", + "username": "", + "password": "", + "host": "%C3%B1", + "hostname": "%C3%B1", + "port": "", + "pathname": "", + "search": "", + "hash": "#x" + }, + { + "input": "?x", + "base": "sc://ñ", + "href": "sc://%C3%B1?x", + "origin": "null", + "protocol": "sc:", + "username": "", + "password": "", + "host": "%C3%B1", + "hostname": "%C3%B1", + "port": "", + "pathname": "", + "search": "?x", + "hash": "" + }, + { + "input": "sc://?", + "base": null, + "href": "sc://?", + "protocol": "sc:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "", + "search": "", + "hash": "" + }, + { + "input": "sc://#", + "base": null, + "href": "sc://#", + "protocol": "sc:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "", + "search": "", + "hash": "" + }, + { + "input": "///", + "base": "sc://x/", + "href": "sc:///", + "protocol": "sc:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "////", + "base": "sc://x/", + "href": "sc:////", + "protocol": "sc:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "//", + "search": "", + "hash": "" + }, + { + "input": "////x/", + "base": "sc://x/", + "href": "sc:////x/", + "protocol": "sc:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "//x/", + "search": "", + "hash": "" + }, + { + "input": "tftp://foobar.com/someconfig;mode=netascii", + "base": null, + "href": "tftp://foobar.com/someconfig;mode=netascii", + "origin": "null", + "protocol": "tftp:", + "username": "", + "password": "", + "host": "foobar.com", + "hostname": "foobar.com", + "port": "", + "pathname": "/someconfig;mode=netascii", + "search": "", + "hash": "" + }, + { + "input": "telnet://user:pass@foobar.com:23/", + "base": null, + "href": "telnet://user:pass@foobar.com:23/", + "origin": "null", + "protocol": "telnet:", + "username": "user", + "password": "pass", + "host": "foobar.com:23", + "hostname": "foobar.com", + "port": "23", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "ut2004://10.10.10.10:7777/Index.ut2", + "base": null, + "href": "ut2004://10.10.10.10:7777/Index.ut2", + "origin": "null", + "protocol": "ut2004:", + "username": "", + "password": "", + "host": "10.10.10.10:7777", + "hostname": "10.10.10.10", + "port": "7777", + "pathname": "/Index.ut2", + "search": "", + "hash": "" + }, + { + "input": "redis://foo:bar@somehost:6379/0?baz=bam&qux=baz", + "base": null, + "href": "redis://foo:bar@somehost:6379/0?baz=bam&qux=baz", + "origin": "null", + "protocol": "redis:", + "username": "foo", + "password": "bar", + "host": "somehost:6379", + "hostname": "somehost", + "port": "6379", + "pathname": "/0", + "search": "?baz=bam&qux=baz", + "hash": "" + }, + { + "input": "rsync://foo@host:911/sup", + "base": null, + "href": "rsync://foo@host:911/sup", + "origin": "null", + "protocol": "rsync:", + "username": "foo", + "password": "", + "host": "host:911", + "hostname": "host", + "port": "911", + "pathname": "/sup", + "search": "", + "hash": "" + }, + { + "input": "git://github.com/foo/bar.git", + "base": null, + "href": "git://github.com/foo/bar.git", + "origin": "null", + "protocol": "git:", + "username": "", + "password": "", + "host": "github.com", + "hostname": "github.com", + "port": "", + "pathname": "/foo/bar.git", + "search": "", + "hash": "" + }, + { + "input": "irc://myserver.com:6999/channel?passwd", + "base": null, + "href": "irc://myserver.com:6999/channel?passwd", + "origin": "null", + "protocol": "irc:", + "username": "", + "password": "", + "host": "myserver.com:6999", + "hostname": "myserver.com", + "port": "6999", + "pathname": "/channel", + "search": "?passwd", + "hash": "" + }, + { + "input": "dns://fw.example.org:9999/foo.bar.org?type=TXT", + "base": null, + "href": "dns://fw.example.org:9999/foo.bar.org?type=TXT", + "origin": "null", + "protocol": "dns:", + "username": "", + "password": "", + "host": "fw.example.org:9999", + "hostname": "fw.example.org", + "port": "9999", + "pathname": "/foo.bar.org", + "search": "?type=TXT", + "hash": "" + }, + { + "input": "ldap://localhost:389/ou=People,o=JNDITutorial", + "base": null, + "href": "ldap://localhost:389/ou=People,o=JNDITutorial", + "origin": "null", + "protocol": "ldap:", + "username": "", + "password": "", + "host": "localhost:389", + "hostname": "localhost", + "port": "389", + "pathname": "/ou=People,o=JNDITutorial", + "search": "", + "hash": "" + }, + { + "input": "git+https://github.com/foo/bar", + "base": null, + "href": "git+https://github.com/foo/bar", + "origin": "null", + "protocol": "git+https:", + "username": "", + "password": "", + "host": "github.com", + "hostname": "github.com", + "port": "", + "pathname": "/foo/bar", + "search": "", + "hash": "" + }, + { + "input": "urn:ietf:rfc:2648", + "base": null, + "href": "urn:ietf:rfc:2648", + "origin": "null", + "protocol": "urn:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "ietf:rfc:2648", + "search": "", + "hash": "" + }, + { + "input": "tag:joe@example.org,2001:foo/bar", + "base": null, + "href": "tag:joe@example.org,2001:foo/bar", + "origin": "null", + "protocol": "tag:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "joe@example.org,2001:foo/bar", + "search": "", + "hash": "" + }, + "Serialize /. in path", + { + "input": "non-spec:/.//", + "base": null, + "href": "non-spec:/.//", + "protocol": "non-spec:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "//", + "search": "", + "hash": "" + }, + { + "input": "non-spec:/..//", + "base": null, + "href": "non-spec:/.//", + "protocol": "non-spec:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "//", + "search": "", + "hash": "" + }, + { + "input": "non-spec:/a/..//", + "base": null, + "href": "non-spec:/.//", + "protocol": "non-spec:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "//", + "search": "", + "hash": "" + }, + { + "input": "non-spec:/.//path", + "base": null, + "href": "non-spec:/.//path", + "protocol": "non-spec:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "//path", + "search": "", + "hash": "" + }, + { + "input": "non-spec:/..//path", + "base": null, + "href": "non-spec:/.//path", + "protocol": "non-spec:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "//path", + "search": "", + "hash": "" + }, + { + "input": "non-spec:/a/..//path", + "base": null, + "href": "non-spec:/.//path", + "protocol": "non-spec:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "//path", + "search": "", + "hash": "" + }, + { + "input": "/.//path", + "base": "non-spec:/p", + "href": "non-spec:/.//path", + "protocol": "non-spec:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "//path", + "search": "", + "hash": "" + }, + { + "input": "/..//path", + "base": "non-spec:/p", + "href": "non-spec:/.//path", + "protocol": "non-spec:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "//path", + "search": "", + "hash": "" + }, + { + "input": "..//path", + "base": "non-spec:/p", + "href": "non-spec:/.//path", + "protocol": "non-spec:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "//path", + "search": "", + "hash": "" + }, + { + "input": "a/..//path", + "base": "non-spec:/p", + "href": "non-spec:/.//path", + "protocol": "non-spec:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "//path", + "search": "", + "hash": "" + }, + { + "input": "", + "base": "non-spec:/..//p", + "href": "non-spec:/.//p", + "protocol": "non-spec:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "//p", + "search": "", + "hash": "" + }, + { + "input": "path", + "base": "non-spec:/..//p", + "href": "non-spec:/.//path", + "protocol": "non-spec:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "//path", + "search": "", + "hash": "" + }, + "Do not serialize /. in path", + { + "input": "../path", + "base": "non-spec:/.//p", + "href": "non-spec:/path", + "protocol": "non-spec:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/path", + "search": "", + "hash": "" + }, + "# percent encoded hosts in non-special-URLs", + { + "input": "non-special://%E2%80%A0/", + "base": null, + "href": "non-special://%E2%80%A0/", + "protocol": "non-special:", + "username": "", + "password": "", + "host": "%E2%80%A0", + "hostname": "%E2%80%A0", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "non-special://H%4fSt/path", + "base": null, + "href": "non-special://H%4fSt/path", + "protocol": "non-special:", + "username": "", + "password": "", + "host": "H%4fSt", + "hostname": "H%4fSt", + "port": "", + "pathname": "/path", + "search": "", + "hash": "" + }, + "# IPv6 in non-special-URLs", + { + "input": "non-special://[1:2:0:0:5:0:0:0]/", + "base": null, + "href": "non-special://[1:2:0:0:5::]/", + "protocol": "non-special:", + "username": "", + "password": "", + "host": "[1:2:0:0:5::]", + "hostname": "[1:2:0:0:5::]", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "non-special://[1:2:0:0:0:0:0:3]/", + "base": null, + "href": "non-special://[1:2::3]/", + "protocol": "non-special:", + "username": "", + "password": "", + "host": "[1:2::3]", + "hostname": "[1:2::3]", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "non-special://[1:2::3]:80/", + "base": null, + "href": "non-special://[1:2::3]:80/", + "protocol": "non-special:", + "username": "", + "password": "", + "host": "[1:2::3]:80", + "hostname": "[1:2::3]", + "port": "80", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "non-special://[:80/", + "base": null, + "failure": true + }, + { + "input": "blob:https://example.com:443/", + "base": null, + "href": "blob:https://example.com:443/", + "origin": "https://example.com", + "protocol": "blob:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "https://example.com:443/", + "search": "", + "hash": "" + }, + { + "input": "blob:http://example.org:88/", + "base": null, + "href": "blob:http://example.org:88/", + "origin": "http://example.org:88", + "protocol": "blob:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "http://example.org:88/", + "search": "", + "hash": "" + }, + { + "input": "blob:d3958f5c-0777-0845-9dcf-2cb28783acaf", + "base": null, + "href": "blob:d3958f5c-0777-0845-9dcf-2cb28783acaf", + "origin": "null", + "protocol": "blob:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "d3958f5c-0777-0845-9dcf-2cb28783acaf", + "search": "", + "hash": "" + }, + { + "input": "blob:", + "base": null, + "href": "blob:", + "origin": "null", + "protocol": "blob:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "", + "search": "", + "hash": "" + }, + "blob: in blob:", + { + "input": "blob:blob:", + "base": null, + "href": "blob:blob:", + "origin": "null", + "protocol": "blob:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "blob:", + "search": "", + "hash": "" + }, + { + "input": "blob:blob:https://example.org/", + "base": null, + "href": "blob:blob:https://example.org/", + "origin": "null", + "protocol": "blob:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "blob:https://example.org/", + "search": "", + "hash": "" + }, + "Non-http(s): in blob:", + { + "input": "blob:about:blank", + "base": null, + "href": "blob:about:blank", + "origin": "null", + "protocol": "blob:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "about:blank", + "search": "", + "hash": "" + }, + { + "input": "blob:file://host/path", + "base": null, + "href": "blob:file://host/path", + "protocol": "blob:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "file://host/path", + "search": "", + "hash": "" + }, + { + "input": "blob:ftp://host/path", + "base": null, + "href": "blob:ftp://host/path", + "origin": "null", + "protocol": "blob:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "ftp://host/path", + "search": "", + "hash": "" + }, + { + "input": "blob:ws://example.org/", + "base": null, + "href": "blob:ws://example.org/", + "origin": "null", + "protocol": "blob:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "ws://example.org/", + "search": "", + "hash": "" + }, + { + "input": "blob:wss://example.org/", + "base": null, + "href": "blob:wss://example.org/", + "origin": "null", + "protocol": "blob:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "wss://example.org/", + "search": "", + "hash": "" + }, + "Percent-encoded http: in blob:", + { + "input": "blob:http%3a//example.org/", + "base": null, + "href": "blob:http%3a//example.org/", + "origin": "null", + "protocol": "blob:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "http%3a//example.org/", + "search": "", + "hash": "" + }, + "Invalid IPv4 radix digits", + { + "input": "http://0x7f.0.0.0x7g", + "base": null, + "href": "http://0x7f.0.0.0x7g/", + "protocol": "http:", + "username": "", + "password": "", + "host": "0x7f.0.0.0x7g", + "hostname": "0x7f.0.0.0x7g", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "http://0X7F.0.0.0X7G", + "base": null, + "href": "http://0x7f.0.0.0x7g/", + "protocol": "http:", + "username": "", + "password": "", + "host": "0x7f.0.0.0x7g", + "hostname": "0x7f.0.0.0x7g", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + "Invalid IPv4 portion of IPv6 address", + { + "input": "http://[::127.0.0.0.1]", + "base": null, + "failure": true + }, + "Uncompressed IPv6 addresses with 0", + { + "input": "http://[0:1:0:1:0:1:0:1]", + "base": null, + "href": "http://[0:1:0:1:0:1:0:1]/", + "protocol": "http:", + "username": "", + "password": "", + "host": "[0:1:0:1:0:1:0:1]", + "hostname": "[0:1:0:1:0:1:0:1]", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "http://[1:0:1:0:1:0:1:0]", + "base": null, + "href": "http://[1:0:1:0:1:0:1:0]/", + "protocol": "http:", + "username": "", + "password": "", + "host": "[1:0:1:0:1:0:1:0]", + "hostname": "[1:0:1:0:1:0:1:0]", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + "Percent-encoded query and fragment", + { + "input": "http://example.org/test?\"", + "base": null, + "href": "http://example.org/test?%22", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.org", + "hostname": "example.org", + "port": "", + "pathname": "/test", + "search": "?%22", + "hash": "" + }, + { + "input": "http://example.org/test?#", + "base": null, + "href": "http://example.org/test?#", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.org", + "hostname": "example.org", + "port": "", + "pathname": "/test", + "search": "", + "hash": "" + }, + { + "input": "http://example.org/test?<", + "base": null, + "href": "http://example.org/test?%3C", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.org", + "hostname": "example.org", + "port": "", + "pathname": "/test", + "search": "?%3C", + "hash": "" + }, + { + "input": "http://example.org/test?>", + "base": null, + "href": "http://example.org/test?%3E", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.org", + "hostname": "example.org", + "port": "", + "pathname": "/test", + "search": "?%3E", + "hash": "" + }, + { + "input": "http://example.org/test?⌣", + "base": null, + "href": "http://example.org/test?%E2%8C%A3", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.org", + "hostname": "example.org", + "port": "", + "pathname": "/test", + "search": "?%E2%8C%A3", + "hash": "" + }, + { + "input": "http://example.org/test?%23%23", + "base": null, + "href": "http://example.org/test?%23%23", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.org", + "hostname": "example.org", + "port": "", + "pathname": "/test", + "search": "?%23%23", + "hash": "" + }, + { + "input": "http://example.org/test?%GH", + "base": null, + "href": "http://example.org/test?%GH", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.org", + "hostname": "example.org", + "port": "", + "pathname": "/test", + "search": "?%GH", + "hash": "" + }, + { + "input": "http://example.org/test?a#%EF", + "base": null, + "href": "http://example.org/test?a#%EF", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.org", + "hostname": "example.org", + "port": "", + "pathname": "/test", + "search": "?a", + "hash": "#%EF" + }, + { + "input": "http://example.org/test?a#%GH", + "base": null, + "href": "http://example.org/test?a#%GH", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.org", + "hostname": "example.org", + "port": "", + "pathname": "/test", + "search": "?a", + "hash": "#%GH" + }, + "URLs that require a non-about:blank base. (Also serve as invalid base tests.)", + { + "input": "a", + "base": null, + "failure": true, + "relativeTo": "non-opaque-path-base" + }, + { + "input": "a/", + "base": null, + "failure": true, + "relativeTo": "non-opaque-path-base" + }, + { + "input": "a//", + "base": null, + "failure": true, + "relativeTo": "non-opaque-path-base" + }, + "Bases that don't fail to parse but fail to be bases", + { + "input": "test-a-colon.html", + "base": "a:", + "failure": true + }, + { + "input": "test-a-colon-b.html", + "base": "a:b", + "failure": true + }, + "Other base URL tests, that must succeed", + { + "input": "test-a-colon-slash.html", + "base": "a:/", + "href": "a:/test-a-colon-slash.html", + "protocol": "a:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/test-a-colon-slash.html", + "search": "", + "hash": "" + }, + { + "input": "test-a-colon-slash-slash.html", + "base": "a://", + "href": "a:///test-a-colon-slash-slash.html", + "protocol": "a:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/test-a-colon-slash-slash.html", + "search": "", + "hash": "" + }, + { + "input": "test-a-colon-slash-b.html", + "base": "a:/b", + "href": "a:/test-a-colon-slash-b.html", + "protocol": "a:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/test-a-colon-slash-b.html", + "search": "", + "hash": "" + }, + { + "input": "test-a-colon-slash-slash-b.html", + "base": "a://b", + "href": "a://b/test-a-colon-slash-slash-b.html", + "protocol": "a:", + "username": "", + "password": "", + "host": "b", + "hostname": "b", + "port": "", + "pathname": "/test-a-colon-slash-slash-b.html", + "search": "", + "hash": "" + }, + "Null code point in fragment", + { + "input": "http://example.org/test?a#b\u0000c", + "base": null, + "href": "http://example.org/test?a#b%00c", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.org", + "hostname": "example.org", + "port": "", + "pathname": "/test", + "search": "?a", + "hash": "#b%00c" + }, + { + "input": "non-spec://example.org/test?a#b\u0000c", + "base": null, + "href": "non-spec://example.org/test?a#b%00c", + "protocol": "non-spec:", + "username": "", + "password": "", + "host": "example.org", + "hostname": "example.org", + "port": "", + "pathname": "/test", + "search": "?a", + "hash": "#b%00c" + }, + { + "input": "non-spec:/test?a#b\u0000c", + "base": null, + "href": "non-spec:/test?a#b%00c", + "protocol": "non-spec:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/test", + "search": "?a", + "hash": "#b%00c" + }, + "First scheme char - not allowed: https://github.com/whatwg/url/issues/464", + { + "input": "10.0.0.7:8080/foo.html", + "base": "file:///some/dir/bar.html", + "href": "file:///some/dir/10.0.0.7:8080/foo.html", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/some/dir/10.0.0.7:8080/foo.html", + "search": "", + "hash": "" + }, + "Subsequent scheme chars - not allowed", + { + "input": "a!@$*=/foo.html", + "base": "file:///some/dir/bar.html", + "href": "file:///some/dir/a!@$*=/foo.html", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/some/dir/a!@$*=/foo.html", + "search": "", + "hash": "" + }, + "First and subsequent scheme chars - allowed", + { + "input": "a1234567890-+.:foo/bar", + "base": "http://example.com/dir/file", + "href": "a1234567890-+.:foo/bar", + "protocol": "a1234567890-+.:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "foo/bar", + "search": "", + "hash": "" + }, + "IDNA ignored code points in file URLs hosts", + { + "input": "file://a­b/p", + "base": null, + "href": "file://ab/p", + "protocol": "file:", + "username": "", + "password": "", + "host": "ab", + "hostname": "ab", + "port": "", + "pathname": "/p", + "search": "", + "hash": "" + }, + { + "input": "file://a%C2%ADb/p", + "base": null, + "href": "file://ab/p", + "protocol": "file:", + "username": "", + "password": "", + "host": "ab", + "hostname": "ab", + "port": "", + "pathname": "/p", + "search": "", + "hash": "" + }, + "IDNA hostnames which get mapped to 'localhost'", + { + "input": "file://loC𝐀𝐋𝐇𝐨𝐬𝐭/usr/bin", + "base": null, + "href": "file:///usr/bin", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/usr/bin", + "search": "", + "hash": "" + }, + "Empty host after the domain to ASCII", + { + "input": "file://­/p", + "base": null, + "failure": true + }, + { + "input": "file://%C2%AD/p", + "base": null, + "failure": true + }, + { + "input": "file://xn--/p", + "base": null, + "failure": true + }, + "https://bugzilla.mozilla.org/show_bug.cgi?id=1647058", + { + "input": "#link", + "base": "https://example.org/##link", + "href": "https://example.org/#link", + "protocol": "https:", + "username": "", + "password": "", + "host": "example.org", + "hostname": "example.org", + "port": "", + "pathname": "/", + "search": "", + "hash": "#link" + }, + "UTF-8 percent-encode of C0 control percent-encode set and supersets", + { + "input": "non-special:cannot-be-a-base-url-\u0000\u0001\u001f\u001e~€", + "base": null, + "hash": "", + "host": "", + "hostname": "", + "href": "non-special:cannot-be-a-base-url-%00%01%1F%1E~%7F%C2%80", + "origin": "null", + "password": "", + "pathname": "cannot-be-a-base-url-%00%01%1F%1E~%7F%C2%80", + "port": "", + "protocol": "non-special:", + "search": "", + "username": "" + }, + { + "input": "https://www.example.com/path{path.html?query'=query#fragment<fragment", + "base": null, + "hash": "#fragment%3C%7Ffragment", + "host": "www.example.com", + "hostname": "www.example.com", + "href": "https://www.example.com/path%7B%7Fpath.html?query%27%7F=query#fragment%3C%7Ffragment", + "origin": "https://www.example.com", + "password": "", + "pathname": "/path%7B%7Fpath.html", + "port": "", + "protocol": "https:", + "search": "?query%27%7F=query", + "username": "" + }, + { + "input": "https://user:pass[@foo/bar", + "base": "http://example.org", + "hash": "", + "host": "foo", + "hostname": "foo", + "href": "https://user:pass%5B%7F@foo/bar", + "origin": "https://foo", + "password": "pass%5B%7F", + "pathname": "/bar", + "port": "", + "protocol": "https:", + "search": "", + "username": "user" + }, + "Tests for the distinct percent-encode sets", + { + "input": "foo:// !\"$%&'()*+,-.;<=>@[\\]^_`{|}~@host/", + "base": null, + "hash": "", + "host": "host", + "hostname": "host", + "href": "foo://%20!%22$%&'()*+,-.%3B%3C%3D%3E%40%5B%5C%5D%5E_%60%7B%7C%7D~@host/", + "origin": "null", + "password": "", + "pathname": "/", + "port": "", + "protocol": "foo:", + "search": "", + "username": "%20!%22$%&'()*+,-.%3B%3C%3D%3E%40%5B%5C%5D%5E_%60%7B%7C%7D~" + }, + { + "input": "wss:// !\"$%&'()*+,-.;<=>@[]^_`{|}~@host/", + "base": null, + "hash": "", + "host": "host", + "hostname": "host", + "href": "wss://%20!%22$%&'()*+,-.%3B%3C%3D%3E%40%5B%5D%5E_%60%7B%7C%7D~@host/", + "origin": "wss://host", + "password": "", + "pathname": "/", + "port": "", + "protocol": "wss:", + "search": "", + "username": "%20!%22$%&'()*+,-.%3B%3C%3D%3E%40%5B%5D%5E_%60%7B%7C%7D~" + }, + { + "input": "foo://joe: !\"$%&'()*+,-.:;<=>@[\\]^_`{|}~@host/", + "base": null, + "hash": "", + "host": "host", + "hostname": "host", + "href": "foo://joe:%20!%22$%&'()*+,-.%3A%3B%3C%3D%3E%40%5B%5C%5D%5E_%60%7B%7C%7D~@host/", + "origin": "null", + "password": "%20!%22$%&'()*+,-.%3A%3B%3C%3D%3E%40%5B%5C%5D%5E_%60%7B%7C%7D~", + "pathname": "/", + "port": "", + "protocol": "foo:", + "search": "", + "username": "joe" + }, + { + "input": "wss://joe: !\"$%&'()*+,-.:;<=>@[]^_`{|}~@host/", + "base": null, + "hash": "", + "host": "host", + "hostname": "host", + "href": "wss://joe:%20!%22$%&'()*+,-.%3A%3B%3C%3D%3E%40%5B%5D%5E_%60%7B%7C%7D~@host/", + "origin": "wss://host", + "password": "%20!%22$%&'()*+,-.%3A%3B%3C%3D%3E%40%5B%5D%5E_%60%7B%7C%7D~", + "pathname": "/", + "port": "", + "protocol": "wss:", + "search": "", + "username": "joe" + }, + { + "input": "foo://!\"$%&'()*+,-.;=_`{}~/", + "base": null, + "hash": "", + "host": "!\"$%&'()*+,-.;=_`{}~", + "hostname": "!\"$%&'()*+,-.;=_`{}~", + "href": "foo://!\"$%&'()*+,-.;=_`{}~/", + "origin": "null", + "password": "", + "pathname": "/", + "port": "", + "protocol": "foo:", + "search": "", + "username": "" + }, + { + "input": "wss://!\"$&'()*+,-.;=_`{}~/", + "base": null, + "hash": "", + "host": "!\"$&'()*+,-.;=_`{}~", + "hostname": "!\"$&'()*+,-.;=_`{}~", + "href": "wss://!\"$&'()*+,-.;=_`{}~/", + "origin": "wss://!\"$&'()*+,-.;=_`{}~", + "password": "", + "pathname": "/", + "port": "", + "protocol": "wss:", + "search": "", + "username": "" + }, + { + "input": "foo://host/ !\"$%&'()*+,-./:;<=>@[\\]^_`{|}~", + "base": null, + "hash": "", + "host": "host", + "hostname": "host", + "href": "foo://host/%20!%22$%&'()*+,-./:;%3C=%3E@[\\]^_%60%7B|%7D~", + "origin": "null", + "password": "", + "pathname": "/%20!%22$%&'()*+,-./:;%3C=%3E@[\\]^_%60%7B|%7D~", + "port": "", + "protocol": "foo:", + "search": "", + "username": "" + }, + { + "input": "wss://host/ !\"$%&'()*+,-./:;<=>@[\\]^_`{|}~", + "base": null, + "hash": "", + "host": "host", + "hostname": "host", + "href": "wss://host/%20!%22$%&'()*+,-./:;%3C=%3E@[/]^_%60%7B|%7D~", + "origin": "wss://host", + "password": "", + "pathname": "/%20!%22$%&'()*+,-./:;%3C=%3E@[/]^_%60%7B|%7D~", + "port": "", + "protocol": "wss:", + "search": "", + "username": "" + }, + { + "input": "foo://host/dir/? !\"$%&'()*+,-./:;<=>?@[\\]^_`{|}~", + "base": null, + "hash": "", + "host": "host", + "hostname": "host", + "href": "foo://host/dir/?%20!%22$%&'()*+,-./:;%3C=%3E?@[\\]^_`{|}~", + "origin": "null", + "password": "", + "pathname": "/dir/", + "port": "", + "protocol": "foo:", + "search": "?%20!%22$%&'()*+,-./:;%3C=%3E?@[\\]^_`{|}~", + "username": "" + }, + { + "input": "wss://host/dir/? !\"$%&'()*+,-./:;<=>?@[\\]^_`{|}~", + "base": null, + "hash": "", + "host": "host", + "hostname": "host", + "href": "wss://host/dir/?%20!%22$%&%27()*+,-./:;%3C=%3E?@[\\]^_`{|}~", + "origin": "wss://host", + "password": "", + "pathname": "/dir/", + "port": "", + "protocol": "wss:", + "search": "?%20!%22$%&%27()*+,-./:;%3C=%3E?@[\\]^_`{|}~", + "username": "" + }, + { + "input": "foo://host/dir/# !\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~", + "base": null, + "hash": "#%20!%22#$%&'()*+,-./:;%3C=%3E?@[\\]^_%60{|}~", + "host": "host", + "hostname": "host", + "href": "foo://host/dir/#%20!%22#$%&'()*+,-./:;%3C=%3E?@[\\]^_%60{|}~", + "origin": "null", + "password": "", + "pathname": "/dir/", + "port": "", + "protocol": "foo:", + "search": "", + "username": "" + }, + { + "input": "wss://host/dir/# !\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~", + "base": null, + "hash": "#%20!%22#$%&'()*+,-./:;%3C=%3E?@[\\]^_%60{|}~", + "host": "host", + "hostname": "host", + "href": "wss://host/dir/#%20!%22#$%&'()*+,-./:;%3C=%3E?@[\\]^_%60{|}~", + "origin": "wss://host", + "password": "", + "pathname": "/dir/", + "port": "", + "protocol": "wss:", + "search": "", + "username": "" + }, + "Ensure that input schemes are not ignored when resolving non-special URLs", + { + "input": "abc:rootless", + "base": "abc://host/path", + "hash": "", + "host": "", + "hostname": "", + "href": "abc:rootless", + "password": "", + "pathname": "rootless", + "port": "", + "protocol": "abc:", + "search": "", + "username": "" + }, + { + "input": "abc:rootless", + "base": "abc:/path", + "hash": "", + "host": "", + "hostname": "", + "href": "abc:rootless", + "password": "", + "pathname": "rootless", + "port": "", + "protocol": "abc:", + "search": "", + "username": "" + }, + { + "input": "abc:rootless", + "base": "abc:path", + "hash": "", + "host": "", + "hostname": "", + "href": "abc:rootless", + "password": "", + "pathname": "rootless", + "port": "", + "protocol": "abc:", + "search": "", + "username": "" + }, + { + "input": "abc:/rooted", + "base": "abc://host/path", + "hash": "", + "host": "", + "hostname": "", + "href": "abc:/rooted", + "password": "", + "pathname": "/rooted", + "port": "", + "protocol": "abc:", + "search": "", + "username": "" + }, + "Empty query and fragment with blank should throw an error", + { + "input": "#", + "base": null, + "failure": true, + "relativeTo": "any-base" + }, + { + "input": "?", + "base": null, + "failure": true, + "relativeTo": "non-opaque-path-base" + }, + "Last component looks like a number, but not valid IPv4", + { + "input": "http://1.2.3.4.5", + "base": "http://other.com/", + "failure": true + }, + { + "input": "http://1.2.3.4.5.", + "base": "http://other.com/", + "failure": true + }, + { + "input": "http://0..0x300/", + "base": null, + "failure": true + }, + { + "input": "http://0..0x300./", + "base": null, + "failure": true + }, + { + "input": "http://256.256.256.256.256", + "base": "http://other.com/", + "failure": true + }, + { + "input": "http://256.256.256.256.256.", + "base": "http://other.com/", + "failure": true + }, + { + "input": "http://1.2.3.08", + "base": null, + "failure": true + }, + { + "input": "http://1.2.3.08.", + "base": null, + "failure": true + }, + { + "input": "http://1.2.3.09", + "base": null, + "failure": true + }, + { + "input": "http://09.2.3.4", + "base": null, + "failure": true + }, + { + "input": "http://09.2.3.4.", + "base": null, + "failure": true + }, + { + "input": "http://01.2.3.4.5", + "base": null, + "failure": true + }, + { + "input": "http://01.2.3.4.5.", + "base": null, + "failure": true + }, + { + "input": "http://0x100.2.3.4", + "base": null, + "failure": true + }, + { + "input": "http://0x100.2.3.4.", + "base": null, + "failure": true + }, + { + "input": "http://0x1.2.3.4.5", + "base": null, + "failure": true + }, + { + "input": "http://0x1.2.3.4.5.", + "base": null, + "failure": true + }, + { + "input": "http://foo.1.2.3.4", + "base": null, + "failure": true + }, + { + "input": "http://foo.1.2.3.4.", + "base": null, + "failure": true + }, + { + "input": "http://foo.2.3.4", + "base": null, + "failure": true + }, + { + "input": "http://foo.2.3.4.", + "base": null, + "failure": true + }, + { + "input": "http://foo.09", + "base": null, + "failure": true + }, + { + "input": "http://foo.09.", + "base": null, + "failure": true + }, + { + "input": "http://foo.0x4", + "base": null, + "failure": true + }, + { + "input": "http://foo.0x4.", + "base": null, + "failure": true + }, + { + "input": "http://foo.09..", + "base": null, + "hash": "", + "host": "foo.09..", + "hostname": "foo.09..", + "href": "http://foo.09../", + "password": "", + "pathname": "/", + "port": "", + "protocol": "http:", + "search": "", + "username": "" + }, + { + "input": "http://0999999999999999999/", + "base": null, + "failure": true + }, + { + "input": "http://foo.0x", + "base": null, + "failure": true + }, + { + "input": "http://foo.0XFfFfFfFfFfFfFfFfFfAcE123", + "base": null, + "failure": true + }, + { + "input": "http://💩.123/", + "base": null, + "failure": true + }, + "U+0000 and U+FFFF in various places", + { + "input": "https://\u0000y", + "base": null, + "failure": true + }, + { + "input": "https://x/\u0000y", + "base": null, + "hash": "", + "host": "x", + "hostname": "x", + "href": "https://x/%00y", + "password": "", + "pathname": "/%00y", + "port": "", + "protocol": "https:", + "search": "", + "username": "" + }, + { + "input": "https://x/?\u0000y", + "base": null, + "hash": "", + "host": "x", + "hostname": "x", + "href": "https://x/?%00y", + "password": "", + "pathname": "/", + "port": "", + "protocol": "https:", + "search": "?%00y", + "username": "" + }, + { + "input": "https://x/?#\u0000y", + "base": null, + "hash": "#%00y", + "host": "x", + "hostname": "x", + "href": "https://x/?#%00y", + "password": "", + "pathname": "/", + "port": "", + "protocol": "https:", + "search": "", + "username": "" + }, + { + "input": "https://￿y", + "base": null, + "failure": true + }, + { + "input": "https://x/￿y", + "base": null, + "hash": "", + "host": "x", + "hostname": "x", + "href": "https://x/%EF%BF%BFy", + "password": "", + "pathname": "/%EF%BF%BFy", + "port": "", + "protocol": "https:", + "search": "", + "username": "" + }, + { + "input": "https://x/?￿y", + "base": null, + "hash": "", + "host": "x", + "hostname": "x", + "href": "https://x/?%EF%BF%BFy", + "password": "", + "pathname": "/", + "port": "", + "protocol": "https:", + "search": "?%EF%BF%BFy", + "username": "" + }, + { + "input": "https://x/?#￿y", + "base": null, + "hash": "#%EF%BF%BFy", + "host": "x", + "hostname": "x", + "href": "https://x/?#%EF%BF%BFy", + "password": "", + "pathname": "/", + "port": "", + "protocol": "https:", + "search": "", + "username": "" + }, + { + "input": "non-special:\u0000y", + "base": null, + "hash": "", + "host": "", + "hostname": "", + "href": "non-special:%00y", + "password": "", + "pathname": "%00y", + "port": "", + "protocol": "non-special:", + "search": "", + "username": "" + }, + { + "input": "non-special:x/\u0000y", + "base": null, + "hash": "", + "host": "", + "hostname": "", + "href": "non-special:x/%00y", + "password": "", + "pathname": "x/%00y", + "port": "", + "protocol": "non-special:", + "search": "", + "username": "" + }, + { + "input": "non-special:x/?\u0000y", + "base": null, + "hash": "", + "host": "", + "hostname": "", + "href": "non-special:x/?%00y", + "password": "", + "pathname": "x/", + "port": "", + "protocol": "non-special:", + "search": "?%00y", + "username": "" + }, + { + "input": "non-special:x/?#\u0000y", + "base": null, + "hash": "#%00y", + "host": "", + "hostname": "", + "href": "non-special:x/?#%00y", + "password": "", + "pathname": "x/", + "port": "", + "protocol": "non-special:", + "search": "", + "username": "" + }, + { + "input": "non-special:￿y", + "base": null, + "hash": "", + "host": "", + "hostname": "", + "href": "non-special:%EF%BF%BFy", + "password": "", + "pathname": "%EF%BF%BFy", + "port": "", + "protocol": "non-special:", + "search": "", + "username": "" + }, + { + "input": "non-special:x/￿y", + "base": null, + "hash": "", + "host": "", + "hostname": "", + "href": "non-special:x/%EF%BF%BFy", + "password": "", + "pathname": "x/%EF%BF%BFy", + "port": "", + "protocol": "non-special:", + "search": "", + "username": "" + }, + { + "input": "non-special:x/?￿y", + "base": null, + "hash": "", + "host": "", + "hostname": "", + "href": "non-special:x/?%EF%BF%BFy", + "password": "", + "pathname": "x/", + "port": "", + "protocol": "non-special:", + "search": "?%EF%BF%BFy", + "username": "" + }, + { + "input": "non-special:x/?#￿y", + "base": null, + "hash": "#%EF%BF%BFy", + "host": "", + "hostname": "", + "href": "non-special:x/?#%EF%BF%BFy", + "password": "", + "pathname": "x/", + "port": "", + "protocol": "non-special:", + "search": "", + "username": "" + }, + { + "input": "", + "base": null, + "failure": true, + "relativeTo": "non-opaque-path-base" + }, + { + "input": "https://example.com/\"quoted\"", + "base": null, + "hash": "", + "host": "example.com", + "hostname": "example.com", + "href": "https://example.com/%22quoted%22", + "origin": "https://example.com", + "password": "", + "pathname": "/%22quoted%22", + "port": "", + "protocol": "https:", + "search": "", + "username": "" + }, + { + "input": "https://a%C2%ADb/", + "base": null, + "hash": "", + "host": "ab", + "hostname": "ab", + "href": "https://ab/", + "origin": "https://ab", + "password": "", + "pathname": "/", + "port": "", + "protocol": "https:", + "search": "", + "username": "" + }, + { + "comment": "Empty host after domain to ASCII", + "input": "https://­/", + "base": null, + "failure": true + }, + { + "input": "https://%C2%AD/", + "base": null, + "failure": true + }, + { + "input": "https://xn--/", + "base": null, + "failure": true + }, + "Non-special schemes that some implementations might incorrectly treat as special", + { + "input": "data://example.com:8080/pathname?search#hash", + "base": null, + "href": "data://example.com:8080/pathname?search#hash", + "origin": "null", + "protocol": "data:", + "username": "", + "password": "", + "host": "example.com:8080", + "hostname": "example.com", + "port": "8080", + "pathname": "/pathname", + "search": "?search", + "hash": "#hash" + }, + { + "input": "data:///test", + "base": null, + "href": "data:///test", + "origin": "null", + "protocol": "data:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/test", + "search": "", + "hash": "" + }, + { + "input": "data://test/a/../b", + "base": null, + "href": "data://test/b", + "origin": "null", + "protocol": "data:", + "username": "", + "password": "", + "host": "test", + "hostname": "test", + "port": "", + "pathname": "/b", + "search": "", + "hash": "" + }, + { + "input": "data://:443", + "base": null, + "failure": true + }, + { + "input": "data://test:test", + "base": null, + "failure": true + }, + { + "input": "data://[:1]", + "base": null, + "failure": true + }, + { + "input": "javascript://example.com:8080/pathname?search#hash", + "base": null, + "href": "javascript://example.com:8080/pathname?search#hash", + "origin": "null", + "protocol": "javascript:", + "username": "", + "password": "", + "host": "example.com:8080", + "hostname": "example.com", + "port": "8080", + "pathname": "/pathname", + "search": "?search", + "hash": "#hash" + }, + { + "input": "javascript:///test", + "base": null, + "href": "javascript:///test", + "origin": "null", + "protocol": "javascript:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/test", + "search": "", + "hash": "" + }, + { + "input": "javascript://test/a/../b", + "base": null, + "href": "javascript://test/b", + "origin": "null", + "protocol": "javascript:", + "username": "", + "password": "", + "host": "test", + "hostname": "test", + "port": "", + "pathname": "/b", + "search": "", + "hash": "" + }, + { + "input": "javascript://:443", + "base": null, + "failure": true + }, + { + "input": "javascript://test:test", + "base": null, + "failure": true + }, + { + "input": "javascript://[:1]", + "base": null, + "failure": true + }, + { + "input": "mailto://example.com:8080/pathname?search#hash", + "base": null, + "href": "mailto://example.com:8080/pathname?search#hash", + "origin": "null", + "protocol": "mailto:", + "username": "", + "password": "", + "host": "example.com:8080", + "hostname": "example.com", + "port": "8080", + "pathname": "/pathname", + "search": "?search", + "hash": "#hash" + }, + { + "input": "mailto:///test", + "base": null, + "href": "mailto:///test", + "origin": "null", + "protocol": "mailto:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/test", + "search": "", + "hash": "" + }, + { + "input": "mailto://test/a/../b", + "base": null, + "href": "mailto://test/b", + "origin": "null", + "protocol": "mailto:", + "username": "", + "password": "", + "host": "test", + "hostname": "test", + "port": "", + "pathname": "/b", + "search": "", + "hash": "" + }, + { + "input": "mailto://:443", + "base": null, + "failure": true + }, + { + "input": "mailto://test:test", + "base": null, + "failure": true + }, + { + "input": "mailto://[:1]", + "base": null, + "failure": true + }, + { + "input": "intent://example.com:8080/pathname?search#hash", + "base": null, + "href": "intent://example.com:8080/pathname?search#hash", + "origin": "null", + "protocol": "intent:", + "username": "", + "password": "", + "host": "example.com:8080", + "hostname": "example.com", + "port": "8080", + "pathname": "/pathname", + "search": "?search", + "hash": "#hash" + }, + { + "input": "intent:///test", + "base": null, + "href": "intent:///test", + "origin": "null", + "protocol": "intent:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/test", + "search": "", + "hash": "" + }, + { + "input": "intent://test/a/../b", + "base": null, + "href": "intent://test/b", + "origin": "null", + "protocol": "intent:", + "username": "", + "password": "", + "host": "test", + "hostname": "test", + "port": "", + "pathname": "/b", + "search": "", + "hash": "" + }, + { + "input": "intent://:443", + "base": null, + "failure": true + }, + { + "input": "intent://test:test", + "base": null, + "failure": true + }, + { + "input": "intent://[:1]", + "base": null, + "failure": true + }, + { + "input": "urn://example.com:8080/pathname?search#hash", + "base": null, + "href": "urn://example.com:8080/pathname?search#hash", + "origin": "null", + "protocol": "urn:", + "username": "", + "password": "", + "host": "example.com:8080", + "hostname": "example.com", + "port": "8080", + "pathname": "/pathname", + "search": "?search", + "hash": "#hash" + }, + { + "input": "urn:///test", + "base": null, + "href": "urn:///test", + "origin": "null", + "protocol": "urn:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/test", + "search": "", + "hash": "" + }, + { + "input": "urn://test/a/../b", + "base": null, + "href": "urn://test/b", + "origin": "null", + "protocol": "urn:", + "username": "", + "password": "", + "host": "test", + "hostname": "test", + "port": "", + "pathname": "/b", + "search": "", + "hash": "" + }, + { + "input": "urn://:443", + "base": null, + "failure": true + }, + { + "input": "urn://test:test", + "base": null, + "failure": true + }, + { + "input": "urn://[:1]", + "base": null, + "failure": true + }, + { + "input": "turn://example.com:8080/pathname?search#hash", + "base": null, + "href": "turn://example.com:8080/pathname?search#hash", + "origin": "null", + "protocol": "turn:", + "username": "", + "password": "", + "host": "example.com:8080", + "hostname": "example.com", + "port": "8080", + "pathname": "/pathname", + "search": "?search", + "hash": "#hash" + }, + { + "input": "turn:///test", + "base": null, + "href": "turn:///test", + "origin": "null", + "protocol": "turn:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/test", + "search": "", + "hash": "" + }, + { + "input": "turn://test/a/../b", + "base": null, + "href": "turn://test/b", + "origin": "null", + "protocol": "turn:", + "username": "", + "password": "", + "host": "test", + "hostname": "test", + "port": "", + "pathname": "/b", + "search": "", + "hash": "" + }, + { + "input": "turn://:443", + "base": null, + "failure": true + }, + { + "input": "turn://test:test", + "base": null, + "failure": true + }, + { + "input": "turn://[:1]", + "base": null, + "failure": true + }, + { + "input": "stun://example.com:8080/pathname?search#hash", + "base": null, + "href": "stun://example.com:8080/pathname?search#hash", + "origin": "null", + "protocol": "stun:", + "username": "", + "password": "", + "host": "example.com:8080", + "hostname": "example.com", + "port": "8080", + "pathname": "/pathname", + "search": "?search", + "hash": "#hash" + }, + { + "input": "stun:///test", + "base": null, + "href": "stun:///test", + "origin": "null", + "protocol": "stun:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/test", + "search": "", + "hash": "" + }, + { + "input": "stun://test/a/../b", + "base": null, + "href": "stun://test/b", + "origin": "null", + "protocol": "stun:", + "username": "", + "password": "", + "host": "test", + "hostname": "test", + "port": "", + "pathname": "/b", + "search": "", + "hash": "" + }, + { + "input": "stun://:443", + "base": null, + "failure": true + }, + { + "input": "stun://test:test", + "base": null, + "failure": true + }, + { + "input": "stun://[:1]", + "base": null, + "failure": true + }, + { + "input": "w://x:0", + "base": null, + "href": "w://x:0", + "origin": "null", + "protocol": "w:", + "username": "", + "password": "", + "host": "x:0", + "hostname": "x", + "port": "0", + "pathname": "", + "search": "", + "hash": "" + }, + { + "input": "west://x:0", + "base": null, + "href": "west://x:0", + "origin": "null", + "protocol": "west:", + "username": "", + "password": "", + "host": "x:0", + "hostname": "x", + "port": "0", + "pathname": "", + "search": "", + "hash": "" + }, + "Scheme relative path starting with multiple slashes", + { + "input": "///test", + "base": "http://example.org/", + "href": "http://test/", + "protocol": "http:", + "username": "", + "password": "", + "host": "test", + "hostname": "test", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "///\\//\\//test", + "base": "http://example.org/", + "href": "http://test/", + "protocol": "http:", + "username": "", + "password": "", + "host": "test", + "hostname": "test", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "///example.org/path", + "base": "http://example.org/", + "href": "http://example.org/path", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.org", + "hostname": "example.org", + "port": "", + "pathname": "/path", + "search": "", + "hash": "" + }, + { + "input": "///example.org/../path", + "base": "http://example.org/", + "href": "http://example.org/path", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.org", + "hostname": "example.org", + "port": "", + "pathname": "/path", + "search": "", + "hash": "" + }, + { + "input": "///example.org/../../", + "base": "http://example.org/", + "href": "http://example.org/", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.org", + "hostname": "example.org", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "///example.org/../path/../../", + "base": "http://example.org/", + "href": "http://example.org/", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.org", + "hostname": "example.org", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "///example.org/../path/../../path", + "base": "http://example.org/", + "href": "http://example.org/path", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.org", + "hostname": "example.org", + "port": "", + "pathname": "/path", + "search": "", + "hash": "" + }, + { + "input": "/\\/\\//example.org/../path", + "base": "http://example.org/", + "href": "http://example.org/path", + "protocol": "http:", + "username": "", + "password": "", + "host": "example.org", + "hostname": "example.org", + "port": "", + "pathname": "/path", + "search": "", + "hash": "" + }, + { + "input": "///abcdef/../", + "base": "file:///", + "href": "file:///", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "/\\//\\/a/../", + "base": "file:///", + "href": "file://////", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "////", + "search": "", + "hash": "" + }, + { + "input": "//a/../", + "base": "file:///", + "href": "file://a/", + "protocol": "file:", + "username": "", + "password": "", + "host": "a", + "hostname": "a", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + } +] diff --git a/packages/playground/wordpress/src/index.ts b/packages/playground/wordpress/src/index.ts index f1da40f2f72..d912d4d3240 100644 --- a/packages/playground/wordpress/src/index.ts +++ b/packages/playground/wordpress/src/index.ts @@ -148,7 +148,7 @@ export async function setupPlatformLevelMuPlugins(php: UniversalPHP) { * Check if the request is for the login page. */ if (is_login() && is_user_logged_in() && isset($_GET['redirect_to'])) { - wp_redirect(esc_url($_GET['redirect_to'])); + wp_redirect($_GET['redirect_to']); exit; } }, 1); From 819febdd2c5b1d68e9e33efc9dbec356a10c4291 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adam=20Zieli=C5=84ski?= Date: Sun, 13 Oct 2024 21:14:12 +0200 Subject: [PATCH 03/31] Port WP_HTML_Processor et al. from WordPress --- .../playground/data-liberation/bootstrap.php | 30 +- ...ass-wp-html-active-formatting-elements.php | 187 + .../class-wp-html-attribute-token.php | 116 + .../src/wordpress/class-wp-html-decoder.php | 461 +++ .../wordpress/class-wp-html-open-elements.php | 462 +++ .../class-wp-html-processor-state.php | 143 + .../src/wordpress/class-wp-html-processor.php | 2008 ++++++++++ .../src/wordpress/class-wp-html-span.php | 56 + .../wordpress/class-wp-html-tag-processor.php | 3550 +++++++++++++++++ .../class-wp-html-text-replacement.php | 64 + .../src/wordpress/class-wp-html-token.php | 106 + .../class-wp-html-unsupported-exception.php | 31 + .../src/wordpress/class-wp-token-map.php | 818 ++++ .../html5-named-character-references.php | 1313 ++++++ 14 files changed, 9330 insertions(+), 15 deletions(-) create mode 100644 packages/playground/data-liberation/src/wordpress/class-wp-html-active-formatting-elements.php create mode 100644 packages/playground/data-liberation/src/wordpress/class-wp-html-attribute-token.php create mode 100644 packages/playground/data-liberation/src/wordpress/class-wp-html-decoder.php create mode 100644 packages/playground/data-liberation/src/wordpress/class-wp-html-open-elements.php create mode 100644 packages/playground/data-liberation/src/wordpress/class-wp-html-processor-state.php create mode 100644 packages/playground/data-liberation/src/wordpress/class-wp-html-processor.php create mode 100644 packages/playground/data-liberation/src/wordpress/class-wp-html-span.php create mode 100644 packages/playground/data-liberation/src/wordpress/class-wp-html-tag-processor.php create mode 100644 packages/playground/data-liberation/src/wordpress/class-wp-html-text-replacement.php create mode 100644 packages/playground/data-liberation/src/wordpress/class-wp-html-token.php create mode 100644 packages/playground/data-liberation/src/wordpress/class-wp-html-unsupported-exception.php create mode 100644 packages/playground/data-liberation/src/wordpress/class-wp-token-map.php create mode 100644 packages/playground/data-liberation/src/wordpress/html5-named-character-references.php diff --git a/packages/playground/data-liberation/bootstrap.php b/packages/playground/data-liberation/bootstrap.php index 11f2649f77c..717954111ce 100644 --- a/packages/playground/data-liberation/bootstrap.php +++ b/packages/playground/data-liberation/bootstrap.php @@ -1,20 +1,20 @@ Initially, the list of active formatting elements is empty. + * > It is used to handle mis-nested formatting element tags. + * > + * > The list contains elements in the formatting category, and markers. + * > The markers are inserted when entering applet, object, marquee, + * > template, td, th, and caption elements, and are used to prevent + * > formatting from "leaking" into applet, object, marquee, template, + * > td, th, and caption elements. + * > + * > In addition, each element in the list of active formatting elements + * > is associated with the token for which it was created, so that + * > further elements can be created for that token if necessary. + * + * @since 6.4.0 + * + * @access private + * + * @see https://html.spec.whatwg.org/#list-of-active-formatting-elements + * @see WP_HTML_Processor + */ +class WP_HTML_Active_Formatting_Elements { + /** + * Holds the stack of active formatting element references. + * + * @since 6.4.0 + * + * @var WP_HTML_Token[] + */ + private $stack = array(); + + /** + * Reports if a specific node is in the stack of active formatting elements. + * + * @since 6.4.0 + * + * @param WP_HTML_Token $token Look for this node in the stack. + * @return bool Whether the referenced node is in the stack of active formatting elements. + */ + public function contains_node( $token ) { + foreach ( $this->walk_up() as $item ) { + if ( $token->bookmark_name === $item->bookmark_name ) { + return true; + } + } + + return false; + } + + /** + * Returns how many nodes are currently in the stack of active formatting elements. + * + * @since 6.4.0 + * + * @return int How many node are in the stack of active formatting elements. + */ + public function count() { + return count( $this->stack ); + } + + /** + * Returns the node at the end of the stack of active formatting elements, + * if one exists. If the stack is empty, returns null. + * + * @since 6.4.0 + * + * @return WP_HTML_Token|null Last node in the stack of active formatting elements, if one exists, otherwise null. + */ + public function current_node() { + $current_node = end( $this->stack ); + + return $current_node ? $current_node : null; + } + + /** + * Pushes a node onto the stack of active formatting elements. + * + * @since 6.4.0 + * + * @see https://html.spec.whatwg.org/#push-onto-the-list-of-active-formatting-elements + * + * @param WP_HTML_Token $token Push this node onto the stack. + */ + public function push( $token ) { + /* + * > If there are already three elements in the list of active formatting elements after the last marker, + * > if any, or anywhere in the list if there are no markers, that have the same tag name, namespace, and + * > attributes as element, then remove the earliest such element from the list of active formatting + * > elements. For these purposes, the attributes must be compared as they were when the elements were + * > created by the parser; two elements have the same attributes if all their parsed attributes can be + * > paired such that the two attributes in each pair have identical names, namespaces, and values + * > (the order of the attributes does not matter). + * + * @todo Implement the "Noah's Ark clause" to only add up to three of any given kind of formatting elements to the stack. + */ + // > Add element to the list of active formatting elements. + $this->stack[] = $token; + } + + /** + * Removes a node from the stack of active formatting elements. + * + * @since 6.4.0 + * + * @param WP_HTML_Token $token Remove this node from the stack, if it's there already. + * @return bool Whether the node was found and removed from the stack of active formatting elements. + */ + public function remove_node( $token ) { + foreach ( $this->walk_up() as $position_from_end => $item ) { + if ( $token->bookmark_name !== $item->bookmark_name ) { + continue; + } + + $position_from_start = $this->count() - $position_from_end - 1; + array_splice( $this->stack, $position_from_start, 1 ); + return true; + } + + return false; + } + + /** + * Steps through the stack of active formatting elements, starting with the + * top element (added first) and walking downwards to the one added last. + * + * This generator function is designed to be used inside a "foreach" loop. + * + * Example: + * + * $html = 'We are here'; + * foreach ( $stack->walk_down() as $node ) { + * echo "{$node->node_name} -> "; + * } + * > EM -> STRONG -> A -> + * + * To start with the most-recently added element and walk towards the top, + * see WP_HTML_Active_Formatting_Elements::walk_up(). + * + * @since 6.4.0 + */ + public function walk_down() { + $count = count( $this->stack ); + + for ( $i = 0; $i < $count; $i++ ) { + yield $this->stack[ $i ]; + } + } + + /** + * Steps through the stack of active formatting elements, starting with the + * bottom element (added last) and walking upwards to the one added first. + * + * This generator function is designed to be used inside a "foreach" loop. + * + * Example: + * + * $html = 'We are here'; + * foreach ( $stack->walk_up() as $node ) { + * echo "{$node->node_name} -> "; + * } + * > A -> STRONG -> EM -> + * + * To start with the first added element and walk towards the bottom, + * see WP_HTML_Active_Formatting_Elements::walk_down(). + * + * @since 6.4.0 + */ + public function walk_up() { + for ( $i = count( $this->stack ) - 1; $i >= 0; $i-- ) { + yield $this->stack[ $i ]; + } + } +} diff --git a/packages/playground/data-liberation/src/wordpress/class-wp-html-attribute-token.php b/packages/playground/data-liberation/src/wordpress/class-wp-html-attribute-token.php new file mode 100644 index 00000000000..74d41320b1c --- /dev/null +++ b/packages/playground/data-liberation/src/wordpress/class-wp-html-attribute-token.php @@ -0,0 +1,116 @@ + + * ------------ length is 12, including quotes + * + * + * ------- length is 6 + * + * + * ------------ length is 11 + * + * @since 6.5.0 Replaced `end` with `length` to more closely match `substr()`. + * + * @var int + */ + public $length; + + /** + * Whether the attribute is a boolean attribute with value `true`. + * + * @since 6.2.0 + * + * @var bool + */ + public $is_true; + + /** + * Constructor. + * + * @since 6.2.0 + * @since 6.5.0 Replaced `end` with `length` to more closely match `substr()`. + * + * @param string $name Attribute name. + * @param int $value_start Attribute value. + * @param int $value_length Number of bytes attribute value spans. + * @param int $start The string offset where the attribute name starts. + * @param int $length Byte length of the entire attribute name or name and value pair expression. + * @param bool $is_true Whether the attribute is a boolean attribute with true value. + */ + public function __construct( $name, $value_start, $value_length, $start, $length, $is_true ) { + $this->name = $name; + $this->value_starts_at = $value_start; + $this->value_length = $value_length; + $this->start = $start; + $this->length = $length; + $this->is_true = $is_true; + } +} diff --git a/packages/playground/data-liberation/src/wordpress/class-wp-html-decoder.php b/packages/playground/data-liberation/src/wordpress/class-wp-html-decoder.php new file mode 100644 index 00000000000..78976002b4a --- /dev/null +++ b/packages/playground/data-liberation/src/wordpress/class-wp-html-decoder.php @@ -0,0 +1,461 @@ += $end ) { + break; + } + + $character_reference = self::read_character_reference( $context, $text, $next_character_reference_at, $token_length ); + if ( isset( $character_reference ) ) { + $at = $next_character_reference_at; + $decoded .= substr( $text, $was_at, $at - $was_at ); + $decoded .= $character_reference; + $at += $token_length; + $was_at = $at; + continue; + } + + ++$at; + } + + if ( 0 === $was_at ) { + return $text; + } + + if ( $was_at < $end ) { + $decoded .= substr( $text, $was_at, $end - $was_at ); + } + + return $decoded; + } + + /** + * Attempt to read a character reference at the given location in a given string, + * depending on the context in which it's found. + * + * If a character reference is found, this function will return the translated value + * that the reference maps to. It will then set `$match_byte_length` the + * number of bytes of input it read while consuming the character reference. This + * gives calling code the opportunity to advance its cursor when traversing a string + * and decoding. + * + * Example: + * + * null === WP_HTML_Decoder::read_character_reference( 'attribute', 'Ships…', 0 ); + * '…' === WP_HTML_Decoder::read_character_reference( 'attribute', 'Ships…', 5, $token_length ); + * 8 === $token_length; // `…` + * + * null === WP_HTML_Decoder::read_character_reference( 'attribute', '¬in', 0 ); + * '∉' === WP_HTML_Decoder::read_character_reference( 'attribute', '∉', 0, $token_length ); + * 7 === $token_length; // `∉` + * + * '¬' === WP_HTML_Decoder::read_character_reference( 'data', '¬in', 0, $token_length ); + * 4 === $token_length; // `¬` + * '∉' === WP_HTML_Decoder::read_character_reference( 'data', '∉', 0, $token_length ); + * 7 === $token_length; // `∉` + * + * @since 6.6.0 + * + * @param string $context `attribute` for decoding attribute values, `data` otherwise. + * @param string $text Text document containing span of text to decode. + * @param int $at Optional. Byte offset into text where span begins, defaults to the beginning (0). + * @param int &$match_byte_length Optional. Set to byte-length of character reference if provided and if a match + * is found, otherwise not set. Default null. + * @return string|false Decoded character reference in UTF-8 if found, otherwise `false`. + */ + public static function read_character_reference( $context, $text, $at = 0, &$match_byte_length = null ) { + /** + * Mappings for HTML5 named character references. + * + * @var WP_Token_Map $html5_named_character_references + */ + global $html5_named_character_references; + + $length = strlen( $text ); + if ( $at + 1 >= $length ) { + return null; + } + + if ( '&' !== $text[ $at ] ) { + return null; + } + + /* + * Numeric character references. + * + * When truncated, these will encode the code point found by parsing the + * digits that are available. For example, when `🅰` is truncated + * to `DZ` it will encode `DZ`. It does not: + * - know how to parse the original `🅰`. + * - fail to parse and return plaintext `DZ`. + * - fail to parse and return the replacement character `�` + */ + if ( '#' === $text[ $at + 1 ] ) { + if ( $at + 2 >= $length ) { + return null; + } + + /** Tracks inner parsing within the numeric character reference. */ + $digits_at = $at + 2; + + if ( 'x' === $text[ $digits_at ] || 'X' === $text[ $digits_at ] ) { + $numeric_base = 16; + $numeric_digits = '0123456789abcdefABCDEF'; + $max_digits = 6; // 􏿿 + ++$digits_at; + } else { + $numeric_base = 10; + $numeric_digits = '0123456789'; + $max_digits = 7; // 􏿿 + } + + // Cannot encode invalid Unicode code points. Max is to U+10FFFF. + $zero_count = strspn( $text, '0', $digits_at ); + $digit_count = strspn( $text, $numeric_digits, $digits_at + $zero_count ); + $after_digits = $digits_at + $zero_count + $digit_count; + $has_semicolon = $after_digits < $length && ';' === $text[ $after_digits ]; + $end_of_span = $has_semicolon ? $after_digits + 1 : $after_digits; + + // `&#` or `&#x` without digits returns into plaintext. + if ( 0 === $digit_count && 0 === $zero_count ) { + return null; + } + + // Whereas `&#` and only zeros is invalid. + if ( 0 === $digit_count ) { + $match_byte_length = $end_of_span - $at; + return '�'; + } + + // If there are too many digits then it's not worth parsing. It's invalid. + if ( $digit_count > $max_digits ) { + $match_byte_length = $end_of_span - $at; + return '�'; + } + + $digits = substr( $text, $digits_at + $zero_count, $digit_count ); + $code_point = intval( $digits, $numeric_base ); + + /* + * Noncharacters, 0x0D, and non-ASCII-whitespace control characters. + * + * > A noncharacter is a code point that is in the range U+FDD0 to U+FDEF, + * > inclusive, or U+FFFE, U+FFFF, U+1FFFE, U+1FFFF, U+2FFFE, U+2FFFF, + * > U+3FFFE, U+3FFFF, U+4FFFE, U+4FFFF, U+5FFFE, U+5FFFF, U+6FFFE, + * > U+6FFFF, U+7FFFE, U+7FFFF, U+8FFFE, U+8FFFF, U+9FFFE, U+9FFFF, + * > U+AFFFE, U+AFFFF, U+BFFFE, U+BFFFF, U+CFFFE, U+CFFFF, U+DFFFE, + * > U+DFFFF, U+EFFFE, U+EFFFF, U+FFFFE, U+FFFFF, U+10FFFE, or U+10FFFF. + * + * A C0 control is a code point that is in the range of U+00 to U+1F, + * but ASCII whitespace includes U+09, U+0A, U+0C, and U+0D. + * + * These characters are invalid but still decode as any valid character. + * This comment is here to note and explain why there's no check to + * remove these characters or replace them. + * + * @see https://infra.spec.whatwg.org/#noncharacter + */ + + /* + * Code points in the C1 controls area need to be remapped as if they + * were stored in Windows-1252. Note! This transformation only happens + * for numeric character references. The raw code points in the byte + * stream are not translated. + * + * > If the number is one of the numbers in the first column of + * > the following table, then find the row with that number in + * > the first column, and set the character reference code to + * > the number in the second column of that row. + */ + if ( $code_point >= 0x80 && $code_point <= 0x9F ) { + $windows_1252_mapping = array( + 0x20AC, // 0x80 -> EURO SIGN (€). + 0x81, // 0x81 -> (no change). + 0x201A, // 0x82 -> SINGLE LOW-9 QUOTATION MARK (‚). + 0x0192, // 0x83 -> LATIN SMALL LETTER F WITH HOOK (ƒ). + 0x201E, // 0x84 -> DOUBLE LOW-9 QUOTATION MARK („). + 0x2026, // 0x85 -> HORIZONTAL ELLIPSIS (…). + 0x2020, // 0x86 -> DAGGER (†). + 0x2021, // 0x87 -> DOUBLE DAGGER (‡). + 0x02C6, // 0x88 -> MODIFIER LETTER CIRCUMFLEX ACCENT (ˆ). + 0x2030, // 0x89 -> PER MILLE SIGN (‰). + 0x0160, // 0x8A -> LATIN CAPITAL LETTER S WITH CARON (Š). + 0x2039, // 0x8B -> SINGLE LEFT-POINTING ANGLE QUOTATION MARK (‹). + 0x0152, // 0x8C -> LATIN CAPITAL LIGATURE OE (Œ). + 0x8D, // 0x8D -> (no change). + 0x017D, // 0x8E -> LATIN CAPITAL LETTER Z WITH CARON (Ž). + 0x8F, // 0x8F -> (no change). + 0x90, // 0x90 -> (no change). + 0x2018, // 0x91 -> LEFT SINGLE QUOTATION MARK (‘). + 0x2019, // 0x92 -> RIGHT SINGLE QUOTATION MARK (’). + 0x201C, // 0x93 -> LEFT DOUBLE QUOTATION MARK (“). + 0x201D, // 0x94 -> RIGHT DOUBLE QUOTATION MARK (”). + 0x2022, // 0x95 -> BULLET (•). + 0x2013, // 0x96 -> EN DASH (–). + 0x2014, // 0x97 -> EM DASH (—). + 0x02DC, // 0x98 -> SMALL TILDE (˜). + 0x2122, // 0x99 -> TRADE MARK SIGN (™). + 0x0161, // 0x9A -> LATIN SMALL LETTER S WITH CARON (š). + 0x203A, // 0x9B -> SINGLE RIGHT-POINTING ANGLE QUOTATION MARK (›). + 0x0153, // 0x9C -> LATIN SMALL LIGATURE OE (œ). + 0x9D, // 0x9D -> (no change). + 0x017E, // 0x9E -> LATIN SMALL LETTER Z WITH CARON (ž). + 0x0178, // 0x9F -> LATIN CAPITAL LETTER Y WITH DIAERESIS (Ÿ). + ); + + $code_point = $windows_1252_mapping[ $code_point - 0x80 ]; + } + + $match_byte_length = $end_of_span - $at; + return self::code_point_to_utf8_bytes( $code_point ); + } + + /** Tracks inner parsing within the named character reference. */ + $name_at = $at + 1; + // Minimum named character reference is two characters. E.g. `GT`. + if ( $name_at + 2 > $length ) { + return null; + } + + $name_length = 0; + $replacement = $html5_named_character_references->read_token( $text, $name_at, $name_length ); + if ( false === $replacement ) { + return null; + } + + $after_name = $name_at + $name_length; + + // If the match ended with a semicolon then it should always be decoded. + if ( ';' === $text[ $name_at + $name_length - 1 ] ) { + $match_byte_length = $after_name - $at; + return $replacement; + } + + /* + * At this point though there's a match for an entry in the named + * character reference table but the match doesn't end in `;`. + * It may be allowed if it's followed by something unambiguous. + */ + $ambiguous_follower = ( + $after_name < $length && + $name_at < $length && + ( + ctype_alnum( $text[ $after_name ] ) || + '=' === $text[ $after_name ] + ) + ); + + // It's non-ambiguous, safe to leave it in. + if ( ! $ambiguous_follower ) { + $match_byte_length = $after_name - $at; + return $replacement; + } + + // It's ambiguous, which isn't allowed inside attributes. + if ( 'attribute' === $context ) { + return null; + } + + $match_byte_length = $after_name - $at; + return $replacement; + } + + /** + * Encode a code point number into the UTF-8 encoding. + * + * This encoder implements the UTF-8 encoding algorithm for converting + * a code point into a byte sequence. If it receives an invalid code + * point it will return the Unicode Replacement Character U+FFFD `�`. + * + * Example: + * + * '🅰' === WP_HTML_Decoder::code_point_to_utf8_bytes( 0x1f170 ); + * + * // Half of a surrogate pair is an invalid code point. + * '�' === WP_HTML_Decoder::code_point_to_utf8_bytes( 0xd83c ); + * + * @since 6.6.0 + * + * @see https://www.rfc-editor.org/rfc/rfc3629 For the UTF-8 standard. + * + * @param int $code_point Which code point to convert. + * @return string Converted code point, or `�` if invalid. + */ + public static function code_point_to_utf8_bytes( $code_point ) { + // Pre-check to ensure a valid code point. + if ( + $code_point <= 0 || + ( $code_point >= 0xD800 && $code_point <= 0xDFFF ) || + $code_point > 0x10FFFF + ) { + return '�'; + } + + if ( $code_point <= 0x7F ) { + return chr( $code_point ); + } + + if ( $code_point <= 0x7FF ) { + $byte1 = ( $code_point >> 6 ) | 0xC0; + $byte2 = $code_point & 0x3F | 0x80; + + return pack( 'CC', $byte1, $byte2 ); + } + + if ( $code_point <= 0xFFFF ) { + $byte1 = ( $code_point >> 12 ) | 0xE0; + $byte2 = ( $code_point >> 6 ) & 0x3F | 0x80; + $byte3 = $code_point & 0x3F | 0x80; + + return pack( 'CCC', $byte1, $byte2, $byte3 ); + } + + // Any values above U+10FFFF are eliminated above in the pre-check. + $byte1 = ( $code_point >> 18 ) | 0xF0; + $byte2 = ( $code_point >> 12 ) & 0x3F | 0x80; + $byte3 = ( $code_point >> 6 ) & 0x3F | 0x80; + $byte4 = $code_point & 0x3F | 0x80; + + return pack( 'CCCC', $byte1, $byte2, $byte3, $byte4 ); + } +} diff --git a/packages/playground/data-liberation/src/wordpress/class-wp-html-open-elements.php b/packages/playground/data-liberation/src/wordpress/class-wp-html-open-elements.php new file mode 100644 index 00000000000..1234abcb9df --- /dev/null +++ b/packages/playground/data-liberation/src/wordpress/class-wp-html-open-elements.php @@ -0,0 +1,462 @@ + Initially, the stack of open elements is empty. The stack grows + * > downwards; the topmost node on the stack is the first one added + * > to the stack, and the bottommost node of the stack is the most + * > recently added node in the stack (notwithstanding when the stack + * > is manipulated in a random access fashion as part of the handling + * > for misnested tags). + * + * @since 6.4.0 + * + * @access private + * + * @see https://html.spec.whatwg.org/#stack-of-open-elements + * @see WP_HTML_Processor + */ +class WP_HTML_Open_Elements { + /** + * Holds the stack of open element references. + * + * @since 6.4.0 + * + * @var WP_HTML_Token[] + */ + public $stack = array(); + + /** + * Whether a P element is in button scope currently. + * + * This class optimizes scope lookup by pre-calculating + * this value when elements are added and removed to the + * stack of open elements which might change its value. + * This avoids frequent iteration over the stack. + * + * @since 6.4.0 + * + * @var bool + */ + private $has_p_in_button_scope = false; + + /** + * Reports if a specific node is in the stack of open elements. + * + * @since 6.4.0 + * + * @param WP_HTML_Token $token Look for this node in the stack. + * @return bool Whether the referenced node is in the stack of open elements. + */ + public function contains_node( $token ) { + foreach ( $this->walk_up() as $item ) { + if ( $token->bookmark_name === $item->bookmark_name ) { + return true; + } + } + + return false; + } + + /** + * Returns how many nodes are currently in the stack of open elements. + * + * @since 6.4.0 + * + * @return int How many node are in the stack of open elements. + */ + public function count() { + return count( $this->stack ); + } + + /** + * Returns the node at the end of the stack of open elements, + * if one exists. If the stack is empty, returns null. + * + * @since 6.4.0 + * + * @return WP_HTML_Token|null Last node in the stack of open elements, if one exists, otherwise null. + */ + public function current_node() { + $current_node = end( $this->stack ); + + return $current_node ? $current_node : null; + } + + /** + * Returns whether an element is in a specific scope. + * + * ## HTML Support + * + * This function skips checking for the termination list because there + * are no supported elements which appear in the termination list. + * + * @since 6.4.0 + * + * @see https://html.spec.whatwg.org/#has-an-element-in-the-specific-scope + * + * @param string $tag_name Name of tag check. + * @param string[] $termination_list List of elements that terminate the search. + * @return bool Whether the element was found in a specific scope. + */ + public function has_element_in_specific_scope( $tag_name, $termination_list ) { + foreach ( $this->walk_up() as $node ) { + if ( $node->node_name === $tag_name ) { + return true; + } + + if ( + '(internal: H1 through H6 - do not use)' === $tag_name && + in_array( $node->node_name, array( 'H1', 'H2', 'H3', 'H4', 'H5', 'H6' ), true ) + ) { + return true; + } + + switch ( $node->node_name ) { + case 'HTML': + return false; + } + + if ( in_array( $node->node_name, $termination_list, true ) ) { + return false; + } + } + + return false; + } + + /** + * Returns whether a particular element is in scope. + * + * @since 6.4.0 + * + * @see https://html.spec.whatwg.org/#has-an-element-in-scope + * + * @param string $tag_name Name of tag to check. + * @return bool Whether given element is in scope. + */ + public function has_element_in_scope( $tag_name ) { + return $this->has_element_in_specific_scope( + $tag_name, + array( + + /* + * Because it's not currently possible to encounter + * one of the termination elements, they don't need + * to be listed here. If they were, they would be + * unreachable and only waste CPU cycles while + * scanning through HTML. + */ + ) + ); + } + + /** + * Returns whether a particular element is in list item scope. + * + * @since 6.4.0 + * @since 6.5.0 Implemented: no longer throws on every invocation. + * + * @see https://html.spec.whatwg.org/#has-an-element-in-list-item-scope + * + * @param string $tag_name Name of tag to check. + * @return bool Whether given element is in scope. + */ + public function has_element_in_list_item_scope( $tag_name ) { + return $this->has_element_in_specific_scope( + $tag_name, + array( + // There are more elements that belong here which aren't currently supported. + 'OL', + 'UL', + ) + ); + } + + /** + * Returns whether a particular element is in button scope. + * + * @since 6.4.0 + * + * @see https://html.spec.whatwg.org/#has-an-element-in-button-scope + * + * @param string $tag_name Name of tag to check. + * @return bool Whether given element is in scope. + */ + public function has_element_in_button_scope( $tag_name ) { + return $this->has_element_in_specific_scope( $tag_name, array( 'BUTTON' ) ); + } + + /** + * Returns whether a particular element is in table scope. + * + * @since 6.4.0 + * + * @see https://html.spec.whatwg.org/#has-an-element-in-table-scope + * + * @throws WP_HTML_Unsupported_Exception Always until this function is implemented. + * + * @param string $tag_name Name of tag to check. + * @return bool Whether given element is in scope. + */ + public function has_element_in_table_scope( $tag_name ) { + throw new WP_HTML_Unsupported_Exception( 'Cannot process elements depending on table scope.' ); + + return false; // The linter requires this unreachable code until the function is implemented and can return. + } + + /** + * Returns whether a particular element is in select scope. + * + * @since 6.4.0 + * + * @see https://html.spec.whatwg.org/#has-an-element-in-select-scope + * + * @throws WP_HTML_Unsupported_Exception Always until this function is implemented. + * + * @param string $tag_name Name of tag to check. + * @return bool Whether given element is in scope. + */ + public function has_element_in_select_scope( $tag_name ) { + throw new WP_HTML_Unsupported_Exception( 'Cannot process elements depending on select scope.' ); + + return false; // The linter requires this unreachable code until the function is implemented and can return. + } + + /** + * Returns whether a P is in BUTTON scope. + * + * @since 6.4.0 + * + * @see https://html.spec.whatwg.org/#has-an-element-in-button-scope + * + * @return bool Whether a P is in BUTTON scope. + */ + public function has_p_in_button_scope() { + return $this->has_p_in_button_scope; + } + + /** + * Pops a node off of the stack of open elements. + * + * @since 6.4.0 + * + * @see https://html.spec.whatwg.org/#stack-of-open-elements + * + * @return bool Whether a node was popped off of the stack. + */ + public function pop() { + $item = array_pop( $this->stack ); + + if ( null === $item ) { + return false; + } + + $this->after_element_pop( $item ); + return true; + } + + /** + * Pops nodes off of the stack of open elements until one with the given tag name has been popped. + * + * @since 6.4.0 + * + * @see WP_HTML_Open_Elements::pop + * + * @param string $tag_name Name of tag that needs to be popped off of the stack of open elements. + * @return bool Whether a tag of the given name was found and popped off of the stack of open elements. + */ + public function pop_until( $tag_name ) { + foreach ( $this->walk_up() as $item ) { + $this->pop(); + + if ( + '(internal: H1 through H6 - do not use)' === $tag_name && + in_array( $item->node_name, array( 'H1', 'H2', 'H3', 'H4', 'H5', 'H6' ), true ) + ) { + return true; + } + + if ( $tag_name === $item->node_name ) { + return true; + } + } + + return false; + } + + /** + * Pushes a node onto the stack of open elements. + * + * @since 6.4.0 + * + * @see https://html.spec.whatwg.org/#stack-of-open-elements + * + * @param WP_HTML_Token $stack_item Item to add onto stack. + */ + public function push( $stack_item ) { + $this->stack[] = $stack_item; + $this->after_element_push( $stack_item ); + } + + /** + * Removes a specific node from the stack of open elements. + * + * @since 6.4.0 + * + * @param WP_HTML_Token $token The node to remove from the stack of open elements. + * @return bool Whether the node was found and removed from the stack of open elements. + */ + public function remove_node( $token ) { + foreach ( $this->walk_up() as $position_from_end => $item ) { + if ( $token->bookmark_name !== $item->bookmark_name ) { + continue; + } + + $position_from_start = $this->count() - $position_from_end - 1; + array_splice( $this->stack, $position_from_start, 1 ); + $this->after_element_pop( $item ); + return true; + } + + return false; + } + + + /** + * Steps through the stack of open elements, starting with the top element + * (added first) and walking downwards to the one added last. + * + * This generator function is designed to be used inside a "foreach" loop. + * + * Example: + * + * $html = 'We are here'; + * foreach ( $stack->walk_down() as $node ) { + * echo "{$node->node_name} -> "; + * } + * > EM -> STRONG -> A -> + * + * To start with the most-recently added element and walk towards the top, + * see WP_HTML_Open_Elements::walk_up(). + * + * @since 6.4.0 + */ + public function walk_down() { + $count = count( $this->stack ); + + for ( $i = 0; $i < $count; $i++ ) { + yield $this->stack[ $i ]; + } + } + + /** + * Steps through the stack of open elements, starting with the bottom element + * (added last) and walking upwards to the one added first. + * + * This generator function is designed to be used inside a "foreach" loop. + * + * Example: + * + * $html = 'We are here'; + * foreach ( $stack->walk_up() as $node ) { + * echo "{$node->node_name} -> "; + * } + * > A -> STRONG -> EM -> + * + * To start with the first added element and walk towards the bottom, + * see WP_HTML_Open_Elements::walk_down(). + * + * @since 6.4.0 + * @since 6.5.0 Accepts $above_this_node to start traversal above a given node, if it exists. + * + * @param ?WP_HTML_Token $above_this_node Start traversing above this node, if provided and if the node exists. + */ + public function walk_up( $above_this_node = null ) { + $has_found_node = null === $above_this_node; + + for ( $i = count( $this->stack ) - 1; $i >= 0; $i-- ) { + $node = $this->stack[ $i ]; + + if ( ! $has_found_node ) { + $has_found_node = $node === $above_this_node; + continue; + } + + yield $node; + } + } + + /* + * Internal helpers. + */ + + /** + * Updates internal flags after adding an element. + * + * Certain conditions (such as "has_p_in_button_scope") are maintained here as + * flags that are only modified when adding and removing elements. This allows + * the HTML Processor to quickly check for these conditions instead of iterating + * over the open stack elements upon each new tag it encounters. These flags, + * however, need to be maintained as items are added and removed from the stack. + * + * @since 6.4.0 + * + * @param WP_HTML_Token $item Element that was added to the stack of open elements. + */ + public function after_element_push( $item ) { + /* + * When adding support for new elements, expand this switch to trap + * cases where the precalculated value needs to change. + */ + switch ( $item->node_name ) { + case 'BUTTON': + $this->has_p_in_button_scope = false; + break; + + case 'P': + $this->has_p_in_button_scope = true; + break; + } + } + + /** + * Updates internal flags after removing an element. + * + * Certain conditions (such as "has_p_in_button_scope") are maintained here as + * flags that are only modified when adding and removing elements. This allows + * the HTML Processor to quickly check for these conditions instead of iterating + * over the open stack elements upon each new tag it encounters. These flags, + * however, need to be maintained as items are added and removed from the stack. + * + * @since 6.4.0 + * + * @param WP_HTML_Token $item Element that was removed from the stack of open elements. + */ + public function after_element_pop( $item ) { + /* + * When adding support for new elements, expand this switch to trap + * cases where the precalculated value needs to change. + */ + switch ( $item->node_name ) { + case 'BUTTON': + $this->has_p_in_button_scope = $this->has_element_in_button_scope( 'P' ); + break; + + case 'P': + $this->has_p_in_button_scope = $this->has_element_in_button_scope( 'P' ); + break; + } + } +} diff --git a/packages/playground/data-liberation/src/wordpress/class-wp-html-processor-state.php b/packages/playground/data-liberation/src/wordpress/class-wp-html-processor-state.php new file mode 100644 index 00000000000..9cf10c34410 --- /dev/null +++ b/packages/playground/data-liberation/src/wordpress/class-wp-html-processor-state.php @@ -0,0 +1,143 @@ + The frameset-ok flag is set to "ok" when the parser is created. It is set to "not ok" after certain tokens are seen. + * + * @since 6.4.0 + * + * @see https://html.spec.whatwg.org/#frameset-ok-flag + * + * @var bool + */ + public $frameset_ok = true; + + /** + * Constructor - creates a new and empty state value. + * + * @since 6.4.0 + * + * @see WP_HTML_Processor + */ + public function __construct() { + $this->stack_of_open_elements = new WP_HTML_Open_Elements(); + $this->active_formatting_elements = new WP_HTML_Active_Formatting_Elements(); + } +} diff --git a/packages/playground/data-liberation/src/wordpress/class-wp-html-processor.php b/packages/playground/data-liberation/src/wordpress/class-wp-html-processor.php new file mode 100644 index 00000000000..c2349d8a28e --- /dev/null +++ b/packages/playground/data-liberation/src/wordpress/class-wp-html-processor.php @@ -0,0 +1,2008 @@ +next_tag( array( 'breadcrumbs' => array( 'DIV', 'FIGURE', 'IMG' ) ) ) ) { + * $processor->add_class( 'responsive-image' ); + * } + * + * #### Breadcrumbs + * + * Breadcrumbs represent the stack of open elements from the root + * of the document or fragment down to the currently-matched node, + * if one is currently selected. Call WP_HTML_Processor::get_breadcrumbs() + * to inspect the breadcrumbs for a matched tag. + * + * Breadcrumbs can specify nested HTML structure and are equivalent + * to a CSS selector comprising tag names separated by the child + * combinator, such as "DIV > FIGURE > IMG". + * + * Since all elements find themselves inside a full HTML document + * when parsed, the return value from `get_breadcrumbs()` will always + * contain any implicit outermost elements. For example, when parsing + * with `create_fragment()` in the `BODY` context (the default), any + * tag in the given HTML document will contain `array( 'HTML', 'BODY', … )` + * in its breadcrumbs. + * + * Despite containing the implied outermost elements in their breadcrumbs, + * tags may be found with the shortest-matching breadcrumb query. That is, + * `array( 'IMG' )` matches all IMG elements and `array( 'P', 'IMG' )` + * matches all IMG elements directly inside a P element. To ensure that no + * partial matches erroneously match it's possible to specify in a query + * the full breadcrumb match all the way down from the root HTML element. + * + * Example: + * + * $html = '
A lovely day outside
'; + * // ----- Matches here. + * $processor->next_tag( array( 'breadcrumbs' => array( 'FIGURE', 'IMG' ) ) ); + * + * $html = '
A lovely day outside
'; + * // ---- Matches here. + * $processor->next_tag( array( 'breadcrumbs' => array( 'FIGURE', 'FIGCAPTION', 'EM' ) ) ); + * + * $html = '
'; + * // ----- Matches here, because IMG must be a direct child of the implicit BODY. + * $processor->next_tag( array( 'breadcrumbs' => array( 'BODY', 'IMG' ) ) ); + * + * ## HTML Support + * + * This class implements a small part of the HTML5 specification. + * It's designed to operate within its support and abort early whenever + * encountering circumstances it can't properly handle. This is + * the principle way in which this class remains as simple as possible + * without cutting corners and breaking compliance. + * + * ### Supported elements + * + * If any unsupported element appears in the HTML input the HTML Processor + * will abort early and stop all processing. This draconian measure ensures + * that the HTML Processor won't break any HTML it doesn't fully understand. + * + * The following list specifies the HTML tags that _are_ supported: + * + * - Containers: ADDRESS, BLOCKQUOTE, DETAILS, DIALOG, DIV, FOOTER, HEADER, MAIN, MENU, SPAN, SUMMARY. + * - Custom elements: All custom elements are supported. :) + * - Form elements: BUTTON, DATALIST, FIELDSET, INPUT, LABEL, LEGEND, METER, PROGRESS, SEARCH. + * - Formatting elements: B, BIG, CODE, EM, FONT, I, PRE, SMALL, STRIKE, STRONG, TT, U, WBR. + * - Heading elements: H1, H2, H3, H4, H5, H6, HGROUP. + * - Links: A. + * - Lists: DD, DL, DT, LI, OL, UL. + * - Media elements: AUDIO, CANVAS, EMBED, FIGCAPTION, FIGURE, IMG, MAP, PICTURE, SOURCE, TRACK, VIDEO. + * - Paragraph: BR, P. + * - Phrasing elements: ABBR, AREA, BDI, BDO, CITE, DATA, DEL, DFN, INS, MARK, OUTPUT, Q, SAMP, SUB, SUP, TIME, VAR. + * - Sectioning elements: ARTICLE, ASIDE, HR, NAV, SECTION. + * - Templating elements: SLOT. + * - Text decoration: RUBY. + * - Deprecated elements: ACRONYM, BLINK, CENTER, DIR, ISINDEX, KEYGEN, LISTING, MULTICOL, NEXTID, PARAM, SPACER. + * + * ### Supported markup + * + * Some kinds of non-normative HTML involve reconstruction of formatting elements and + * re-parenting of mis-nested elements. For example, a DIV tag found inside a TABLE + * may in fact belong _before_ the table in the DOM. If the HTML Processor encounters + * such a case it will stop processing. + * + * The following list specifies HTML markup that _is_ supported: + * + * - Markup involving only those tags listed above. + * - Fully-balanced and non-overlapping tags. + * - HTML with unexpected tag closers. + * - Some unbalanced or overlapping tags. + * - P tags after unclosed P tags. + * - BUTTON tags after unclosed BUTTON tags. + * - A tags after unclosed A tags that don't involve any active formatting elements. + * + * @since 6.4.0 + * + * @see WP_HTML_Tag_Processor + * @see https://html.spec.whatwg.org/ + */ +class WP_HTML_Processor extends WP_HTML_Tag_Processor { + /** + * The maximum number of bookmarks allowed to exist at any given time. + * + * HTML processing requires more bookmarks than basic tag processing, + * so this class constant from the Tag Processor is overwritten. + * + * @since 6.4.0 + * + * @var int + */ + const MAX_BOOKMARKS = 100; + + /** + * Holds the working state of the parser, including the stack of + * open elements and the stack of active formatting elements. + * + * Initialized in the constructor. + * + * @since 6.4.0 + * + * @var WP_HTML_Processor_State + */ + private $state = null; + + /** + * Used to create unique bookmark names. + * + * This class sets a bookmark for every tag in the HTML document that it encounters. + * The bookmark name is auto-generated and increments, starting with `1`. These are + * internal bookmarks and are automatically released when the referring WP_HTML_Token + * goes out of scope and is garbage-collected. + * + * @since 6.4.0 + * + * @see WP_HTML_Processor::$release_internal_bookmark_on_destruct + * + * @var int + */ + private $bookmark_counter = 0; + + /** + * Stores an explanation for why something failed, if it did. + * + * @see self::get_last_error + * + * @since 6.4.0 + * + * @var string|null + */ + private $last_error = null; + + /** + * Releases a bookmark when PHP garbage-collects its wrapping WP_HTML_Token instance. + * + * This function is created inside the class constructor so that it can be passed to + * the stack of open elements and the stack of active formatting elements without + * exposing it as a public method on the class. + * + * @since 6.4.0 + * + * @var closure + */ + private $release_internal_bookmark_on_destruct = null; + + /* + * Public Interface Functions + */ + + /** + * Creates an HTML processor in the fragment parsing mode. + * + * Use this for cases where you are processing chunks of HTML that + * will be found within a bigger HTML document, such as rendered + * block output that exists within a post, `the_content` inside a + * rendered site layout. + * + * Fragment parsing occurs within a context, which is an HTML element + * that the document will eventually be placed in. It becomes important + * when special elements have different rules than others, such as inside + * a TEXTAREA or a TITLE tag where things that look like tags are text, + * or inside a SCRIPT tag where things that look like HTML syntax are JS. + * + * The context value should be a representation of the tag into which the + * HTML is found. For most cases this will be the body element. The HTML + * form is provided because a context element may have attributes that + * impact the parse, such as with a SCRIPT tag and its `type` attribute. + * + * ## Current HTML Support + * + * - The only supported context is ``, which is the default value. + * - The only supported document encoding is `UTF-8`, which is the default value. + * + * @since 6.4.0 + * + * @param string $html Input HTML fragment to process. + * @param string $context Context element for the fragment, must be default of ``. + * @param string $encoding Text encoding of the document; must be default of 'UTF-8'. + * @return WP_HTML_Processor|null The created processor if successful, otherwise null. + */ + public static function create_fragment( $html, $context = '', $encoding = 'UTF-8' ) { + if ( '' !== $context || 'UTF-8' !== $encoding ) { + return null; + } + + $processor = new self( $html, self::CONSTRUCTOR_UNLOCK_CODE ); + $processor->state->context_node = array( 'BODY', array() ); + $processor->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_BODY; + + // @todo Create "fake" bookmarks for non-existent but implied nodes. + $processor->bookmarks['root-node'] = new WP_HTML_Span( 0, 0 ); + $processor->bookmarks['context-node'] = new WP_HTML_Span( 0, 0 ); + + $processor->state->stack_of_open_elements->push( + new WP_HTML_Token( + 'root-node', + 'HTML', + false + ) + ); + + $processor->state->stack_of_open_elements->push( + new WP_HTML_Token( + 'context-node', + $processor->state->context_node[0], + false + ) + ); + + return $processor; + } + + /** + * Constructor. + * + * Do not use this method. Use the static creator methods instead. + * + * @access private + * + * @since 6.4.0 + * + * @see WP_HTML_Processor::create_fragment() + * + * @param string $html HTML to process. + * @param string|null $use_the_static_create_methods_instead This constructor should not be called manually. + */ + public function __construct( $html, $use_the_static_create_methods_instead = null ) { + parent::__construct( $html ); + + if ( self::CONSTRUCTOR_UNLOCK_CODE !== $use_the_static_create_methods_instead ) { + _doing_it_wrong( + __METHOD__, + sprintf( + /* translators: %s: WP_HTML_Processor::create_fragment(). */ + __( 'Call %s to create an HTML Processor instead of calling the constructor directly.' ), + 'WP_HTML_Processor::create_fragment()' + ), + '6.4.0' + ); + } + + $this->state = new WP_HTML_Processor_State(); + + /* + * Create this wrapper so that it's possible to pass + * a private method into WP_HTML_Token classes without + * exposing it to any public API. + */ + $this->release_internal_bookmark_on_destruct = function ( $name ) { + parent::release_bookmark( $name ); + }; + } + + /** + * Returns the last error, if any. + * + * Various situations lead to parsing failure but this class will + * return `false` in all those cases. To determine why something + * failed it's possible to request the last error. This can be + * helpful to know to distinguish whether a given tag couldn't + * be found or if content in the document caused the processor + * to give up and abort processing. + * + * Example + * + * $processor = WP_HTML_Processor::create_fragment( '