diff --git a/.babelrc b/.babelrc
index 9508d58d6..131b39a30 100644
--- a/.babelrc
+++ b/.babelrc
@@ -1,5 +1,13 @@
{
- "presets": ["@babel/preset-env"],
+ "presets": [
+ [
+ "@babel/preset-env",
+ {
+ "useBuiltIns": "usage", // or "entry"
+ "corejs": 3
+ }
+ ]
+ ],
"plugins": [
[
"module-resolver",
@@ -22,8 +30,7 @@
[
"@babel/plugin-transform-runtime",
{
- "corejs": 2,
- "regenerator": true
+ "corejs": 3
}
]
]
diff --git a/.eslintrc b/.eslintrc
index d39f2f029..051adc039 100644
--- a/.eslintrc
+++ b/.eslintrc
@@ -24,7 +24,8 @@
"error",
{ "max": 1, "maxEOF": 0, "maxBOF": 0 }
],
- "import/no-unresolved": false
+ "import/no-unresolved": 0,
+ "import/no-cycle": 0
},
"settings": {
"import/resolver": {
diff --git a/dist/mercury.js b/dist/mercury.js
index 4489a8ace..d9dcc8c03 100644
--- a/dist/mercury.js
+++ b/dist/mercury.js
@@ -1,41 +1,122 @@
'use strict';
-function _interopDefault (ex) { return (ex && (typeof ex === 'object') && 'default' in ex) ? ex['default'] : ex; }
-
-var _regeneratorRuntime = _interopDefault(require('@babel/runtime-corejs2/regenerator'));
-var _objectSpread = _interopDefault(require('@babel/runtime-corejs2/helpers/objectSpread'));
-var _objectWithoutProperties = _interopDefault(require('@babel/runtime-corejs2/helpers/objectWithoutProperties'));
-var _asyncToGenerator = _interopDefault(require('@babel/runtime-corejs2/helpers/asyncToGenerator'));
-var URL = _interopDefault(require('url'));
-var cheerio = _interopDefault(require('cheerio'));
-var TurndownService = _interopDefault(require('turndown'));
-var iconv = _interopDefault(require('iconv-lite'));
-var _parseInt = _interopDefault(require('@babel/runtime-corejs2/core-js/parse-int'));
-var _slicedToArray = _interopDefault(require('@babel/runtime-corejs2/helpers/slicedToArray'));
-var _Promise = _interopDefault(require('@babel/runtime-corejs2/core-js/promise'));
-var request = _interopDefault(require('postman-request'));
-var _Reflect$ownKeys = _interopDefault(require('@babel/runtime-corejs2/core-js/reflect/own-keys'));
-var _toConsumableArray = _interopDefault(require('@babel/runtime-corejs2/helpers/toConsumableArray'));
-var _defineProperty = _interopDefault(require('@babel/runtime-corejs2/helpers/defineProperty'));
-var _parseFloat = _interopDefault(require('@babel/runtime-corejs2/core-js/parse-float'));
-var _Set = _interopDefault(require('@babel/runtime-corejs2/core-js/set'));
-var _typeof = _interopDefault(require('@babel/runtime-corejs2/helpers/typeof'));
-var _getIterator = _interopDefault(require('@babel/runtime-corejs2/core-js/get-iterator'));
-var _Object$assign = _interopDefault(require('@babel/runtime-corejs2/core-js/object/assign'));
-var _Object$keys = _interopDefault(require('@babel/runtime-corejs2/core-js/object/keys'));
-var stringDirection = _interopDefault(require('string-direction'));
-var validUrl = _interopDefault(require('valid-url'));
-var moment = _interopDefault(require('moment-timezone'));
-var parseFormat = _interopDefault(require('moment-parseformat'));
-var wuzzy = _interopDefault(require('wuzzy'));
-var difflib = _interopDefault(require('difflib'));
-var _Array$from = _interopDefault(require('@babel/runtime-corejs2/core-js/array/from'));
-var ellipsize = _interopDefault(require('ellipsize'));
-var _Array$isArray = _interopDefault(require('@babel/runtime-corejs2/core-js/array/is-array'));
+var _Object$defineProperty = require('@babel/runtime-corejs3/core-js-stable/object/define-property');
+var _Object$defineProperties = require('@babel/runtime-corejs3/core-js-stable/object/define-properties');
+var _Object$getOwnPropertyDescriptors = require('@babel/runtime-corejs3/core-js-stable/object/get-own-property-descriptors');
+var _forEachInstanceProperty = require('@babel/runtime-corejs3/core-js-stable/instance/for-each');
+var _Object$getOwnPropertyDescriptor = require('@babel/runtime-corejs3/core-js-stable/object/get-own-property-descriptor');
+var _filterInstanceProperty = require('@babel/runtime-corejs3/core-js-stable/instance/filter');
+var _Object$getOwnPropertySymbols = require('@babel/runtime-corejs3/core-js-stable/object/get-own-property-symbols');
+var _Object$keys = require('@babel/runtime-corejs3/core-js-stable/object/keys');
+var _regeneratorRuntime = require('@babel/runtime-corejs3/regenerator');
+var _defineProperty = require('@babel/runtime-corejs3/helpers/defineProperty');
+var _mapInstanceProperty = require('@babel/runtime-corejs3/core-js-stable/instance/map');
+var _objectWithoutProperties = require('@babel/runtime-corejs3/helpers/objectWithoutProperties');
+require('regenerator-runtime/runtime');
+var _asyncToGenerator = require('@babel/runtime-corejs3/helpers/asyncToGenerator');
+var URL = require('url');
+var cheerio = require('cheerio');
+var TurndownService = require('turndown');
+var _includesInstanceProperty = require('@babel/runtime-corejs3/core-js-stable/instance/includes');
+var iconv = require('iconv-lite');
+require('core-js/modules/es.regexp.exec');
+require('core-js/modules/es.string.replace');
+var _trimInstanceProperty = require('@babel/runtime-corejs3/core-js-stable/instance/trim');
+var _findInstanceProperty = require('@babel/runtime-corejs3/core-js-stable/instance/find');
+require('core-js/modules/es.string.match');
+var _parseInt = require('@babel/runtime-corejs3/core-js-stable/parse-int');
+require('core-js/modules/es.regexp.constructor');
+require('core-js/modules/es.regexp.to-string');
+require('core-js/modules/es.string.split');
+require('core-js/modules/es.array.join');
+var _concatInstanceProperty = require('@babel/runtime-corejs3/core-js-stable/instance/concat');
+var _slicedToArray = require('@babel/runtime-corejs3/helpers/slicedToArray');
+var _reverseInstanceProperty = require('@babel/runtime-corejs3/core-js-stable/instance/reverse');
+var _reduceInstanceProperty = require('@babel/runtime-corejs3/core-js-stable/instance/reduce');
+var _sliceInstanceProperty = require('@babel/runtime-corejs3/core-js-stable/instance/slice');
+var _Promise = require('@babel/runtime-corejs3/core-js-stable/promise');
+var request = require('postman-request');
+var _Reflect$ownKeys = require('@babel/runtime-corejs3/core-js-stable/reflect/own-keys');
+var _toConsumableArray = require('@babel/runtime-corejs3/helpers/toConsumableArray');
+var _parseFloat = require('@babel/runtime-corejs3/core-js-stable/parse-float');
+var _Set = require('@babel/runtime-corejs3/core-js-stable/set');
+require('core-js/modules/es.date.to-string');
+require('core-js/modules/es.function.name');
+require('core-js/modules/es.object.to-string');
+var _getIterator = require('@babel/runtime-corejs3/core-js/get-iterator');
+var _Array$isArray = require('@babel/runtime-corejs3/core-js-stable/array/is-array');
+var _getIteratorMethod = require('@babel/runtime-corejs3/core-js/get-iterator-method');
+var _Symbol = require('@babel/runtime-corejs3/core-js-stable/symbol');
+var _Array$from = require('@babel/runtime-corejs3/core-js-stable/array/from');
+var _typeof = require('@babel/runtime-corejs3/helpers/typeof');
+var _indexOfInstanceProperty = require('@babel/runtime-corejs3/core-js-stable/instance/index-of');
+var _Object$assign = require('@babel/runtime-corejs3/core-js-stable/object/assign');
+var _bindInstanceProperty = require('@babel/runtime-corejs3/core-js-stable/instance/bind');
+var stringDirection = require('string-direction');
+var validUrl = require('valid-url');
+require('core-js/modules/es.date.to-iso-string');
+var moment = require('moment-timezone');
+var parseFormat = require('moment-parseformat');
+var wuzzy = require('wuzzy');
+var difflib = require('difflib');
+var ellipsize = require('ellipsize');
+
+function _interopDefaultLegacy (e) { return e && typeof e === 'object' && 'default' in e ? e : { 'default': e }; }
+
+var _Object$defineProperty__default = /*#__PURE__*/_interopDefaultLegacy(_Object$defineProperty);
+var _Object$defineProperties__default = /*#__PURE__*/_interopDefaultLegacy(_Object$defineProperties);
+var _Object$getOwnPropertyDescriptors__default = /*#__PURE__*/_interopDefaultLegacy(_Object$getOwnPropertyDescriptors);
+var _forEachInstanceProperty__default = /*#__PURE__*/_interopDefaultLegacy(_forEachInstanceProperty);
+var _Object$getOwnPropertyDescriptor__default = /*#__PURE__*/_interopDefaultLegacy(_Object$getOwnPropertyDescriptor);
+var _filterInstanceProperty__default = /*#__PURE__*/_interopDefaultLegacy(_filterInstanceProperty);
+var _Object$getOwnPropertySymbols__default = /*#__PURE__*/_interopDefaultLegacy(_Object$getOwnPropertySymbols);
+var _Object$keys__default = /*#__PURE__*/_interopDefaultLegacy(_Object$keys);
+var _regeneratorRuntime__default = /*#__PURE__*/_interopDefaultLegacy(_regeneratorRuntime);
+var _defineProperty__default = /*#__PURE__*/_interopDefaultLegacy(_defineProperty);
+var _mapInstanceProperty__default = /*#__PURE__*/_interopDefaultLegacy(_mapInstanceProperty);
+var _objectWithoutProperties__default = /*#__PURE__*/_interopDefaultLegacy(_objectWithoutProperties);
+var _asyncToGenerator__default = /*#__PURE__*/_interopDefaultLegacy(_asyncToGenerator);
+var URL__default = /*#__PURE__*/_interopDefaultLegacy(URL);
+var cheerio__default = /*#__PURE__*/_interopDefaultLegacy(cheerio);
+var TurndownService__default = /*#__PURE__*/_interopDefaultLegacy(TurndownService);
+var _includesInstanceProperty__default = /*#__PURE__*/_interopDefaultLegacy(_includesInstanceProperty);
+var iconv__default = /*#__PURE__*/_interopDefaultLegacy(iconv);
+var _trimInstanceProperty__default = /*#__PURE__*/_interopDefaultLegacy(_trimInstanceProperty);
+var _findInstanceProperty__default = /*#__PURE__*/_interopDefaultLegacy(_findInstanceProperty);
+var _parseInt__default = /*#__PURE__*/_interopDefaultLegacy(_parseInt);
+var _concatInstanceProperty__default = /*#__PURE__*/_interopDefaultLegacy(_concatInstanceProperty);
+var _slicedToArray__default = /*#__PURE__*/_interopDefaultLegacy(_slicedToArray);
+var _reverseInstanceProperty__default = /*#__PURE__*/_interopDefaultLegacy(_reverseInstanceProperty);
+var _reduceInstanceProperty__default = /*#__PURE__*/_interopDefaultLegacy(_reduceInstanceProperty);
+var _sliceInstanceProperty__default = /*#__PURE__*/_interopDefaultLegacy(_sliceInstanceProperty);
+var _Promise__default = /*#__PURE__*/_interopDefaultLegacy(_Promise);
+var request__default = /*#__PURE__*/_interopDefaultLegacy(request);
+var _Reflect$ownKeys__default = /*#__PURE__*/_interopDefaultLegacy(_Reflect$ownKeys);
+var _toConsumableArray__default = /*#__PURE__*/_interopDefaultLegacy(_toConsumableArray);
+var _parseFloat__default = /*#__PURE__*/_interopDefaultLegacy(_parseFloat);
+var _Set__default = /*#__PURE__*/_interopDefaultLegacy(_Set);
+var _getIterator__default = /*#__PURE__*/_interopDefaultLegacy(_getIterator);
+var _Array$isArray__default = /*#__PURE__*/_interopDefaultLegacy(_Array$isArray);
+var _getIteratorMethod__default = /*#__PURE__*/_interopDefaultLegacy(_getIteratorMethod);
+var _Symbol__default = /*#__PURE__*/_interopDefaultLegacy(_Symbol);
+var _Array$from__default = /*#__PURE__*/_interopDefaultLegacy(_Array$from);
+var _typeof__default = /*#__PURE__*/_interopDefaultLegacy(_typeof);
+var _indexOfInstanceProperty__default = /*#__PURE__*/_interopDefaultLegacy(_indexOfInstanceProperty);
+var _Object$assign__default = /*#__PURE__*/_interopDefaultLegacy(_Object$assign);
+var _bindInstanceProperty__default = /*#__PURE__*/_interopDefaultLegacy(_bindInstanceProperty);
+var stringDirection__default = /*#__PURE__*/_interopDefaultLegacy(stringDirection);
+var validUrl__default = /*#__PURE__*/_interopDefaultLegacy(validUrl);
+var moment__default = /*#__PURE__*/_interopDefaultLegacy(moment);
+var parseFormat__default = /*#__PURE__*/_interopDefaultLegacy(parseFormat);
+var wuzzy__default = /*#__PURE__*/_interopDefaultLegacy(wuzzy);
+var difflib__default = /*#__PURE__*/_interopDefaultLegacy(difflib);
+var ellipsize__default = /*#__PURE__*/_interopDefaultLegacy(ellipsize);
var NORMALIZE_RE = /\s{2,}(?![^<>]*<\/(pre|code|textarea)>)/g;
function normalizeSpaces(text) {
- return text.replace(NORMALIZE_RE, ' ').trim();
+ var _context;
+
+ return _trimInstanceProperty__default['default'](_context = text.replace(NORMALIZE_RE, ' ')).call(_context);
}
// Given a node type to search for, and a list of regular expressions,
@@ -44,7 +125,7 @@ function normalizeSpaces(text) {
// string to be cleaned.
// Only used for date_published currently.
function extractFromUrl(url, regexList) {
- var matchRe = regexList.find(function (re) {
+ var matchRe = _findInstanceProperty__default['default'](regexList).call(regexList, function (re) {
return re.test(url);
});
@@ -82,7 +163,7 @@ function pageNumFromUrl(url) {
var matches = url.match(PAGE_IN_HREF_RE);
if (!matches) return null;
- var pageNum = _parseInt(matches[6], 10); // Return pageNum < 100, otherwise
+ var pageNum = _parseInt__default['default'](matches[6], 10); // Return pageNum < 100, otherwise
// return null
@@ -120,17 +201,20 @@ function isGoodSegment(segment, index, firstSegmentHasLetters) {
function articleBaseUrl(url, parsed) {
- var parsedUrl = parsed || URL.parse(url);
+ var _context, _context2, _context3, _context4;
+
+ var parsedUrl = parsed || URL__default['default'].parse(url);
var protocol = parsedUrl.protocol,
host = parsedUrl.host,
path = parsedUrl.path;
var firstSegmentHasLetters = false;
- var cleanedSegments = path.split('/').reverse().reduce(function (acc, rawSegment, index) {
+
+ var cleanedSegments = _reduceInstanceProperty__default['default'](_context = _reverseInstanceProperty__default['default'](_context2 = path.split('/')).call(_context2)).call(_context, function (acc, rawSegment, index) {
var segment = rawSegment; // Split off and save anything that looks like a file type.
- if (segment.includes('.')) {
+ if (_includesInstanceProperty__default['default'](segment).call(segment, '.')) {
var _segment$split = segment.split('.'),
- _segment$split2 = _slicedToArray(_segment$split, 2),
+ _segment$split2 = _slicedToArray__default['default'](_segment$split, 2),
possibleSegment = _segment$split2[0],
fileExt = _segment$split2[1];
@@ -160,7 +244,8 @@ function articleBaseUrl(url, parsed) {
return acc;
}, []);
- return "".concat(protocol, "//").concat(host).concat(cleanedSegments.reverse().join('/'));
+
+ return _concatInstanceProperty__default['default'](_context3 = _concatInstanceProperty__default['default'](_context4 = "".concat(protocol, "//")).call(_context4, host)).call(_context3, _reverseInstanceProperty__default['default'](cleanedSegments).call(cleanedSegments).join('/'));
}
// Given a string, return True if it appears to have an ending sentence
@@ -171,8 +256,10 @@ function hasSentenceEnd(text) {
}
function excerptContent(content) {
+ var _context;
+
var words = arguments.length > 1 && arguments[1] !== undefined ? arguments[1] : 10;
- return content.trim().split(/\s+/).slice(0, words).join(' ');
+ return _sliceInstanceProperty__default['default'](_context = _trimInstanceProperty__default['default'](content).call(content).split(/\s+/)).call(_context, 0, words).join(' ');
}
// used in our fetchResource function to
@@ -183,19 +270,19 @@ function getEncoding(str) {
var matches = ENCODING_RE.exec(str);
if (matches !== null) {
- var _matches = _slicedToArray(matches, 2);
+ var _matches = _slicedToArray__default['default'](matches, 2);
str = _matches[1];
}
- if (iconv.encodingExists(str)) {
+ if (iconv__default['default'].encodingExists(str)) {
encoding = str;
}
return encoding;
}
-var REQUEST_HEADERS = cheerio.browser ? {} : {
+var REQUEST_HEADERS = cheerio__default['default'].browser ? {} : {
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.36'
}; // The number of milliseconds to attempt to fetch a resource before timing out.
@@ -207,9 +294,13 @@ var BAD_CONTENT_TYPES_RE = new RegExp("^(".concat(BAD_CONTENT_TYPES.join('|'), "
var MAX_CONTENT_LENGTH = 5242880; // Turn the global proxy on or off
+function ownKeys(object, enumerableOnly) { var keys = _Object$keys__default['default'](object); if (_Object$getOwnPropertySymbols__default['default']) { var symbols = _Object$getOwnPropertySymbols__default['default'](object); if (enumerableOnly) symbols = _filterInstanceProperty__default['default'](symbols).call(symbols, function (sym) { return _Object$getOwnPropertyDescriptor__default['default'](object, sym).enumerable; }); keys.push.apply(keys, symbols); } return keys; }
+
+function _objectSpread(target) { for (var i = 1; i < arguments.length; i++) { var source = arguments[i] != null ? arguments[i] : {}; if (i % 2) { var _context3; _forEachInstanceProperty__default['default'](_context3 = ownKeys(Object(source), true)).call(_context3, function (key) { _defineProperty__default['default'](target, key, source[key]); }); } else if (_Object$getOwnPropertyDescriptors__default['default']) { _Object$defineProperties__default['default'](target, _Object$getOwnPropertyDescriptors__default['default'](source)); } else { var _context4; _forEachInstanceProperty__default['default'](_context4 = ownKeys(Object(source))).call(_context4, function (key) { _Object$defineProperty__default['default'](target, key, _Object$getOwnPropertyDescriptor__default['default'](source, key)); }); } } return target; }
+
function get(options) {
- return new _Promise(function (resolve, reject) {
- request(options, function (err, response, body) {
+ return new _Promise__default['default'](function (resolve, reject) {
+ request__default['default'](options, function (err, response, body) {
if (err) {
reject(err);
} else {
@@ -268,25 +359,23 @@ function fetchResource(_x, _x2) {
}
function _fetchResource() {
- _fetchResource = _asyncToGenerator(
- /*#__PURE__*/
- _regeneratorRuntime.mark(function _callee(url, parsedUrl) {
+ _fetchResource = _asyncToGenerator__default['default']( /*#__PURE__*/_regeneratorRuntime__default['default'].mark(function _callee(url, parsedUrl) {
var headers,
options,
- _ref2,
+ _yield$get,
response,
body,
_args = arguments;
- return _regeneratorRuntime.wrap(function _callee$(_context) {
+ return _regeneratorRuntime__default['default'].wrap(function _callee$(_context2) {
while (1) {
- switch (_context.prev = _context.next) {
+ switch (_context2.prev = _context2.next) {
case 0:
headers = _args.length > 2 && _args[2] !== undefined ? _args[2] : {};
- parsedUrl = parsedUrl || URL.parse(encodeURI(url));
+ parsedUrl = parsedUrl || URL__default['default'].parse(encodeURI(url));
options = _objectSpread({
url: parsedUrl.href,
- headers: _objectSpread({}, REQUEST_HEADERS, headers),
+ headers: _objectSpread(_objectSpread({}, REQUEST_HEADERS), headers),
timeout: FETCH_TIMEOUT,
// Accept cookies
jar: true,
@@ -301,34 +390,34 @@ function _fetchResource() {
// Follow GET redirects; this option is for Node only
followRedirect: true
});
- _context.next = 5;
+ _context2.next = 5;
return get(options);
case 5:
- _ref2 = _context.sent;
- response = _ref2.response;
- body = _ref2.body;
- _context.prev = 8;
+ _yield$get = _context2.sent;
+ response = _yield$get.response;
+ body = _yield$get.body;
+ _context2.prev = 8;
validateResponse(response);
- return _context.abrupt("return", {
+ return _context2.abrupt("return", {
body: body,
response: response
});
case 13:
- _context.prev = 13;
- _context.t0 = _context["catch"](8);
- return _context.abrupt("return", {
+ _context2.prev = 13;
+ _context2.t0 = _context2["catch"](8);
+ return _context2.abrupt("return", {
error: true,
- message: _context.t0.message
+ message: _context2.t0.message
});
case 16:
case "end":
- return _context.stop();
+ return _context2.stop();
}
}
- }, _callee, this, [[8, 13]]);
+ }, _callee, null, [[8, 13]]);
}));
return _fetchResource.apply(this, arguments);
}
@@ -355,6 +444,8 @@ function normalizeMetaTags($) {
return $;
}
+var _context;
+
// Spacer images to be removed
var SPACER_RE = new RegExp('transparent|spacer|blank', 'i'); // The class we will use to mark elements we want to keep
// but would normally remove
@@ -363,9 +454,19 @@ var KEEP_CLASS = 'mercury-parser-keep';
var KEEP_SELECTORS = ['iframe[src^="https://www.youtube.com"]', 'iframe[src^="https://www.youtube-nocookie.com"]', 'iframe[src^="http://www.youtube.com"]', 'iframe[src^="https://player.vimeo"]', 'iframe[src^="http://player.vimeo"]', 'iframe[src^="https://www.redditmedia.com"]']; // A list of tags to strip from the output if we encounter them.
var STRIP_OUTPUT_TAGS = ['title', 'script', 'noscript', 'link', 'style', 'hr', 'embed', 'iframe', 'object']; // cleanAttributes
+
+var REMOVE_ATTRS = ['style', 'align'];
+var REMOVE_ATTR_SELECTORS = _mapInstanceProperty__default['default'](REMOVE_ATTRS).call(REMOVE_ATTRS, function (selector) {
+ return "[".concat(selector, "]");
+});
var WHITELIST_ATTRS = ['src', 'srcset', 'sizes', 'type', 'href', 'class', 'id', 'alt', 'xlink:href', 'width', 'height'];
var WHITELIST_ATTRS_RE = new RegExp("^(".concat(WHITELIST_ATTRS.join('|'), ")$"), 'i'); // removeEmpty
+var REMOVE_EMPTY_TAGS = ['p'];
+var REMOVE_EMPTY_SELECTORS = _mapInstanceProperty__default['default'](REMOVE_EMPTY_TAGS).call(REMOVE_EMPTY_TAGS, function (tag) {
+ return "".concat(tag, ":empty");
+}).join(','); // cleanTags
+
var CLEAN_CONDITIONALLY_TAGS = ['ul', 'ol', 'table', 'div', 'button', 'form'].join(','); // cleanHeaders
var HEADER_TAGS = ['h2', 'h3', 'h4', 'h5', 'h6'];
@@ -433,6 +534,7 @@ var candidatesBlacklist = UNLIKELY_CANDIDATES_BLACKLIST.join('|');
var CANDIDATES_BLACKLIST = new RegExp(candidatesBlacklist, 'i');
var candidatesWhitelist = UNLIKELY_CANDIDATES_WHITELIST.join('|');
var CANDIDATES_WHITELIST = new RegExp(candidatesWhitelist, 'i');
+var UNLIKELY_RE = new RegExp(_concatInstanceProperty__default['default'](_context = "!(".concat(candidatesWhitelist, ")|(")).call(_context, candidatesBlacklist, ")"), 'i');
function stripUnlikelyCandidates($) {
// Loop through the provided document and remove any non-link nodes
@@ -445,11 +547,14 @@ function stripUnlikelyCandidates($) {
// :param $: a cheerio object to strip nodes from
// :return $: the cleaned cheerio object
$('*').not('a').each(function (index, node) {
+ var _context;
+
var $node = $(node);
var classes = $node.attr('class');
var id = $node.attr('id');
if (!id && !classes) return;
- var classAndId = "".concat(classes || '', " ").concat(id || '');
+
+ var classAndId = _concatInstanceProperty__default['default'](_context = "".concat(classes || '', " ")).call(_context, id || '');
if (CANDIDATES_WHITELIST.test(classAndId)) {
return;
@@ -469,7 +574,7 @@ function stripUnlikelyCandidates($) {
//
// :param $: A cheerio object
-function brsToPs$$1($) {
+function brsToPs($) {
var collapsing = false;
$('br').each(function (index, element) {
var $element = $(element);
@@ -523,10 +628,10 @@ function paragraphize(node, $) {
function convertDivs($) {
$('div').each(function (index, div) {
var $div = $(div);
- var convertable = $div.children(DIV_TO_P_BLOCK_TAGS).length === 0;
+ var convertible = $div.children(DIV_TO_P_BLOCK_TAGS).length === 0;
- if (convertable) {
- convertNodeTo$$1($div, $, 'p');
+ if (convertible) {
+ convertNodeTo($div, $, 'p');
}
});
return $;
@@ -535,10 +640,10 @@ function convertDivs($) {
function convertSpans($) {
$('span').each(function (index, span) {
var $span = $(span);
- var convertable = $span.parents('p, div').length === 0;
+ var convertible = $span.parents('p, div, li, figcaption').length === 0;
- if (convertable) {
- convertNodeTo$$1($span, $, 'p');
+ if (convertible) {
+ convertNodeTo($span, $, 'p');
}
});
return $;
@@ -555,14 +660,16 @@ function convertSpans($) {
// (By-reference mutation, though. Returned just for convenience.)
-function convertToParagraphs$$1($) {
- $ = brsToPs$$1($);
+function convertToParagraphs($) {
+ $ = brsToPs($);
$ = convertDivs($);
$ = convertSpans($);
return $;
}
-function convertNodeTo$$1($node, $) {
+function convertNodeTo($node, $) {
+ var _context, _context3, _context4, _context5;
+
var tag = arguments.length > 2 && arguments[2] !== undefined ? arguments[2] : 'p';
var node = $node.get(0);
@@ -572,8 +679,10 @@ function convertNodeTo$$1($node, $) {
var attrs = getAttrs(node) || {};
- var attribString = _Reflect$ownKeys(attrs).map(function (key) {
- return "".concat(key, "=").concat(attrs[key]);
+ var attribString = _mapInstanceProperty__default['default'](_context = _Reflect$ownKeys__default['default'](attrs)).call(_context, function (key) {
+ var _context2;
+
+ return _concatInstanceProperty__default['default'](_context2 = "".concat(key, "=")).call(_context2, attrs[key]);
}).join(' ');
var html;
@@ -587,14 +696,14 @@ function convertNodeTo$$1($node, $) {
html = $node.contents();
}
- $node.replaceWith("<".concat(tag, " ").concat(attribString, ">").concat(html, "").concat(tag, ">"));
+ $node.replaceWith(_concatInstanceProperty__default['default'](_context3 = _concatInstanceProperty__default['default'](_context4 = _concatInstanceProperty__default['default'](_context5 = "<".concat(tag, " ")).call(_context5, attribString, ">")).call(_context4, html, "")).call(_context3, tag, ">"));
return $;
}
function cleanForHeight($img, $) {
- var height = _parseInt($img.attr('height'), 10);
+ var height = _parseInt__default['default']($img.attr('height'), 10);
- var width = _parseInt($img.attr('width'), 10) || 20; // Remove images that explicitly have very small heights or
+ var width = _parseInt__default['default']($img.attr('width'), 10) || 20; // Remove images that explicitly have very small heights or
// widths, because they are most likely shims or icons,
// which aren't very useful for reading.
@@ -621,11 +730,12 @@ function removeSpacers($img, $) {
}
function cleanImages($article, $) {
- $article.find('img').each(function (index, img) {
+ _findInstanceProperty__default['default']($article).call($article, 'img').each(function (index, img) {
var $img = $(img);
cleanForHeight($img, $);
removeSpacers($img, $);
});
+
return $;
}
@@ -637,11 +747,13 @@ function markToKeep(article, $, url) {
}
if (url) {
- var _URL$parse = URL.parse(url),
+ var _context, _context2;
+
+ var _URL$parse = URL__default['default'].parse(url),
protocol = _URL$parse.protocol,
hostname = _URL$parse.hostname;
- tags = [].concat(_toConsumableArray(tags), ["iframe[src^=\"".concat(protocol, "//").concat(hostname, "\"]")]);
+ tags = _concatInstanceProperty__default['default'](_context = []).call(_context, _toConsumableArray__default['default'](tags), [_concatInstanceProperty__default['default'](_context2 = "iframe[src^=\"".concat(protocol, "//")).call(_context2, hostname, "\"]")]);
}
$(tags.join(','), article).addClass(KEEP_CLASS);
@@ -664,7 +776,7 @@ function stripJunkTags(article, $) {
// by the title extractor instead. If there's less than 3 of them (<3),
// strip them. Otherwise, turn 'em into H2s.
-function cleanHOnes$$1(article, $) {
+function cleanHOnes(article, $) {
var $hOnes = $('h1', article);
if ($hOnes.length < 3) {
@@ -673,31 +785,38 @@ function cleanHOnes$$1(article, $) {
});
} else {
$hOnes.each(function (index, node) {
- convertNodeTo$$1($(node), $, 'h2');
+ convertNodeTo($(node), $, 'h2');
});
}
return $;
}
+function ownKeys$1(object, enumerableOnly) { var keys = _Object$keys__default['default'](object); if (_Object$getOwnPropertySymbols__default['default']) { var symbols = _Object$getOwnPropertySymbols__default['default'](object); if (enumerableOnly) symbols = _filterInstanceProperty__default['default'](symbols).call(symbols, function (sym) { return _Object$getOwnPropertyDescriptor__default['default'](object, sym).enumerable; }); keys.push.apply(keys, symbols); } return keys; }
+
+function _objectSpread$1(target) { for (var i = 1; i < arguments.length; i++) { var source = arguments[i] != null ? arguments[i] : {}; if (i % 2) { var _context2; _forEachInstanceProperty__default['default'](_context2 = ownKeys$1(Object(source), true)).call(_context2, function (key) { _defineProperty__default['default'](target, key, source[key]); }); } else if (_Object$getOwnPropertyDescriptors__default['default']) { _Object$defineProperties__default['default'](target, _Object$getOwnPropertyDescriptors__default['default'](source)); } else { var _context3; _forEachInstanceProperty__default['default'](_context3 = ownKeys$1(Object(source))).call(_context3, function (key) { _Object$defineProperty__default['default'](target, key, _Object$getOwnPropertyDescriptor__default['default'](source, key)); }); } } return target; }
+
function removeAllButWhitelist($article, $) {
- $article.find('*').each(function (index, node) {
+ _findInstanceProperty__default['default']($article).call($article, '*').each(function (index, node) {
+ var _context;
+
var attrs = getAttrs(node);
- setAttrs(node, _Reflect$ownKeys(attrs).reduce(function (acc, attr) {
+ setAttrs(node, _reduceInstanceProperty__default['default'](_context = _Reflect$ownKeys__default['default'](attrs)).call(_context, function (acc, attr) {
if (WHITELIST_ATTRS_RE.test(attr)) {
- return _objectSpread({}, acc, _defineProperty({}, attr, attrs[attr]));
+ return _objectSpread$1(_objectSpread$1({}, acc), {}, _defineProperty__default['default']({}, attr, attrs[attr]));
}
return acc;
}, {}));
}); // Remove the mercury-parser-keep class from result
+
$(".".concat(KEEP_CLASS), $article).removeClass(KEEP_CLASS);
return $article;
} // Remove attributes like style or align
-function cleanAttributes$$1($article, $) {
+function cleanAttributes($article, $) {
// Grabbing the parent because at this point
// $article will be wrapped in a div which will
// have a score set on it.
@@ -705,24 +824,50 @@ function cleanAttributes$$1($article, $) {
}
function removeEmpty($article, $) {
- $article.find('p').each(function (index, p) {
+ _findInstanceProperty__default['default']($article).call($article, 'p').each(function (index, p) {
+ var _context;
+
var $p = $(p);
- if ($p.find('iframe, img').length === 0 && $p.text().trim() === '') $p.remove();
+ if (_findInstanceProperty__default['default']($p).call($p, 'iframe, img').length === 0 && _trimInstanceProperty__default['default'](_context = $p.text()).call(_context) === '') $p.remove();
});
+
return $;
}
+var _context$1;
+
// // CONTENT FETCHING CONSTANTS ////
+// A list of strings that can be considered unlikely candidates when
+// extracting content from a resource. These strings are joined together
+// and then tested for existence using re:test, so may contain simple,
+// non-pipe style regular expression queries if necessary.
+var UNLIKELY_CANDIDATES_BLACKLIST$1 = ['ad-break', 'adbox', 'advert', 'addthis', 'agegate', 'aux', 'blogger-labels', 'combx', 'comment', 'conversation', 'disqus', 'entry-unrelated', 'extra', 'foot', 'form', 'header', 'hidden', 'loader', 'login', // Note: This can hit 'blogindex'.
+'menu', 'meta', 'nav', 'pager', 'pagination', 'predicta', // readwriteweb inline ad box
+'presence_control_external', // lifehacker.com container full of false positives
+'popup', 'printfriendly', 'related', 'remove', 'remark', 'rss', 'share', 'shoutbox', 'sidebar', 'sociable', 'sponsor', 'tools']; // A list of strings that can be considered LIKELY candidates when
+// extracting content from a resource. Essentially, the inverse of the
+// blacklist above - if something matches both blacklist and whitelist,
+// it is kept. This is useful, for example, if something has a className
+// of "rss-content entry-content". It matched 'rss', so it would normally
+// be removed, however, it's also the entry content, so it should be left
+// alone.
+//
+// These strings are joined together and then tested for existence using
+// re:test, so may contain simple, non-pipe style regular expression queries
+// if necessary.
+
+var UNLIKELY_CANDIDATES_WHITELIST$1 = ['and', 'article', 'body', 'blogindex', 'column', 'content', 'entry-content-asset', 'format', // misuse of form
+'hfeed', 'hentry', 'hatom', 'main', 'page', 'posts', 'shadow']; // A list of tags which, if found inside, should cause a
to NOT
// for a document.
-var NON_TOP_CANDIDATE_TAGS$1 = ['br', 'b', 'i', 'label', 'hr', 'area', 'base', 'basefont', 'input', 'img', 'link', 'meta'];
-var NON_TOP_CANDIDATE_TAGS_RE$1 = new RegExp("^(".concat(NON_TOP_CANDIDATE_TAGS$1.join('|'), ")$"), 'i'); // A list of selectors that specify, very clearly, either hNews or other
+var NON_TOP_CANDIDATE_TAGS = ['br', 'b', 'i', 'label', 'hr', 'area', 'base', 'basefont', 'input', 'img', 'link', 'meta'];
+var NON_TOP_CANDIDATE_TAGS_RE = new RegExp("^(".concat(NON_TOP_CANDIDATE_TAGS.join('|'), ")$"), 'i'); // A list of selectors that specify, very clearly, either hNews or other
// very content-specific style content, like Blogger templates.
// More examples here: http://microformats.org/wiki/blog-post-formats
-var HNEWS_CONTENT_SELECTORS$1 = [['.hentry', '.entry-content'], ['entry', '.entry-content'], ['.entry', '.entry_content'], ['.post', '.postbody'], ['.post', '.post_body'], ['.post', '.post-body']];
-var PHOTO_HINTS$1 = ['figure', 'photo', 'image', 'caption'];
-var PHOTO_HINTS_RE$1 = new RegExp(PHOTO_HINTS$1.join('|'), 'i'); // A list of strings that denote a positive scoring for this content as being
+var HNEWS_CONTENT_SELECTORS = [['.hentry', '.entry-content'], ['entry', '.entry-content'], ['.entry', '.entry_content'], ['.post', '.postbody'], ['.post', '.post_body'], ['.post', '.post-body']];
+var PHOTO_HINTS = ['figure', 'photo', 'image', 'caption'];
+var PHOTO_HINTS_RE = new RegExp(PHOTO_HINTS.join('|'), 'i'); // A list of strings that denote a positive scoring for this content as being
// an article container. Checked against className and id.
//
// TODO: Perhaps have these scale based on their odds of being quality?
@@ -732,7 +877,7 @@ var POSITIVE_SCORE_HINTS$1 = ['article', 'articlecontent', 'instapaper_body', 'b
var POSITIVE_SCORE_RE$1 = new RegExp(POSITIVE_SCORE_HINTS$1.join('|'), 'i'); // Readability publisher-specific guidelines
-var READABILITY_ASSET$1 = new RegExp('entry-content-asset', 'i'); // A list of strings that denote a negative scoring for this content as being
+var READABILITY_ASSET = new RegExp('entry-content-asset', 'i'); // A list of strings that denote a negative scoring for this content as being
// an article container. Checked against className and id.
//
// TODO: Perhaps have these scale based on their odds of being quality?
@@ -745,9 +890,16 @@ var NEGATIVE_SCORE_HINTS$1 = ['adbox', 'advert', 'author', 'bio', 'bookmark', 'b
'scroll', 'secondary', 'share', 'shopping', 'shoutbox', 'side', 'sidebar', 'sponsor', 'stamp', 'sub', 'summary', 'tags', 'tools', 'widget']; // The above list, joined into a matching regular expression
var NEGATIVE_SCORE_RE$1 = new RegExp(NEGATIVE_SCORE_HINTS$1.join('|'), 'i'); // Match a digit. Pretty clear.
-var PARAGRAPH_SCORE_TAGS$1 = new RegExp('^(p|li|span|pre)$', 'i');
-var CHILD_CONTENT_TAGS$1 = new RegExp('^(td|blockquote|ol|ul|dl)$', 'i');
-var BAD_TAGS$1 = new RegExp('^(address|form)$', 'i');
+// blacklisted elements that aren't whitelisted. We do this all in one
+// expression-both because it's only one pass, and because this skips the
+// serialization for whitelisted nodes.
+
+var candidatesBlacklist$1 = UNLIKELY_CANDIDATES_BLACKLIST$1.join('|');
+var candidatesWhitelist$1 = UNLIKELY_CANDIDATES_WHITELIST$1.join('|');
+var UNLIKELY_RE$1 = new RegExp(_concatInstanceProperty__default['default'](_context$1 = "!(".concat(candidatesWhitelist$1, ")|(")).call(_context$1, candidatesBlacklist$1, ")"), 'i');
+var PARAGRAPH_SCORE_TAGS = new RegExp('^(p|li|span|pre)$', 'i');
+var CHILD_CONTENT_TAGS = new RegExp('^(td|blockquote|ol|ul|dl)$', 'i');
+var BAD_TAGS = new RegExp('^(address|form)$', 'i');
function getWeight(node) {
var classes = node.attr('class');
@@ -781,7 +933,7 @@ function getWeight(node) {
// "try to keep photos if we can"
- if (PHOTO_HINTS_RE$1.test(classes)) {
+ if (PHOTO_HINTS_RE.test(classes)) {
score += 10;
} // add 25 if class matches entry-content-asset,
// a class apparently instructed for use in the
@@ -789,7 +941,7 @@ function getWeight(node) {
// https://www.readability.com/developers/guidelines
- if (READABILITY_ASSET$1.test(classes)) {
+ if (READABILITY_ASSET.test(classes)) {
score += 25;
}
}
@@ -801,7 +953,7 @@ function getWeight(node) {
// the node's score attribute
// returns null if no score set
function getScore($node) {
- return _parseFloat($node.attr('score')) || null;
+ return _parseFloat__default['default']($node.attr('score')) || null;
}
// return 1 for every comma in text
@@ -835,9 +987,13 @@ function scoreLength(textLength) {
// commas, etc. Higher is better.
-function scoreParagraph$$1(node) {
+function scoreParagraph(node) {
+ var _context;
+
var score = 1;
- var text = node.text().trim();
+
+ var text = _trimInstanceProperty__default['default'](_context = node.text()).call(_context);
+
var textLength = text.length; // If this paragraph is less than 25 characters, don't count it.
if (textLength < 25) {
@@ -853,7 +1009,7 @@ function scoreParagraph$$1(node) {
// that we strip. This negative tweaks junk setup paragraphs just below
// the cutoff threshold.
- if (text.slice(-1) === ':') {
+ if (_sliceInstanceProperty__default['default'](text).call(text, -1) === ':') {
score -= 1;
}
@@ -865,9 +1021,9 @@ function setScore($node, $, score) {
return $node;
}
-function addScore$$1($node, $, amount) {
+function addScore($node, $, amount) {
try {
- var score = getOrInitScore$$1($node, $) + amount;
+ var score = getOrInitScore($node, $) + amount;
setScore($node, $, score);
} catch (e) {// Ignoring; error occurs in scoreNode
}
@@ -875,11 +1031,11 @@ function addScore$$1($node, $, amount) {
return $node;
}
-function addToParent$$1(node, $, score) {
+function addToParent(node, $, score) {
var parent = node.parent();
if (parent) {
- addScore$$1(parent, $, score * 0.25);
+ addScore(parent, $, score * 0.25);
}
return node;
@@ -888,7 +1044,7 @@ function addToParent$$1(node, $, score) {
// if not, initializes a score based on
// the node's tag type
-function getOrInitScore$$1($node, $) {
+function getOrInitScore($node, $) {
var weightNodes = arguments.length > 2 && arguments[2] !== undefined ? arguments[2] : true;
var score = getScore($node);
@@ -896,38 +1052,38 @@ function getOrInitScore$$1($node, $) {
return score;
}
- score = scoreNode$$1($node);
+ score = scoreNode($node);
if (weightNodes) {
score += getWeight($node);
}
- addToParent$$1($node, $, score);
+ addToParent($node, $, score);
return score;
}
// just scores based on tag.
-function scoreNode$$1($node) {
+function scoreNode($node) {
var _$node$get = $node.get(0),
tagName = _$node$get.tagName; // TODO: Consider ordering by most likely.
// E.g., if divs are a more common tag on a page,
// Could save doing that regex test on every node – AP
- if (PARAGRAPH_SCORE_TAGS$1.test(tagName)) {
- return scoreParagraph$$1($node);
+ if (PARAGRAPH_SCORE_TAGS.test(tagName)) {
+ return scoreParagraph($node);
}
if (tagName.toLowerCase() === 'div') {
return 5;
}
- if (CHILD_CONTENT_TAGS$1.test(tagName)) {
+ if (CHILD_CONTENT_TAGS.test(tagName)) {
return 3;
}
- if (BAD_TAGS$1.test(tagName)) {
+ if (BAD_TAGS.test(tagName)) {
return -3;
}
@@ -945,7 +1101,7 @@ function convertSpans$1($node, $) {
if (tagName === 'span') {
// convert spans to divs
- convertNodeTo$$1($node, $, 'div');
+ convertNodeTo($node, $, 'div');
}
}
}
@@ -953,7 +1109,7 @@ function convertSpans$1($node, $) {
function addScoreTo($node, $, score) {
if ($node) {
convertSpans$1($node, $);
- addScore$$1($node, $, score);
+ addScore($node, $, score);
}
}
@@ -962,15 +1118,15 @@ function scorePs($, weightNodes) {
// The raw score for this paragraph, before we add any parent/child
// scores.
var $node = $(node);
- $node = setScore($node, $, getOrInitScore$$1($node, $, weightNodes));
+ $node = setScore($node, $, getOrInitScore($node, $, weightNodes));
var $parent = $node.parent();
- var rawScore = scoreNode$$1($node);
- addScoreTo($parent, $, rawScore, weightNodes);
+ var rawScore = scoreNode($node);
+ addScoreTo($parent, $, rawScore);
if ($parent) {
// Add half of the individual content score to the
// grandparent
- addScoreTo($parent.parent(), $, rawScore / 2, weightNodes);
+ addScoreTo($parent.parent(), $, rawScore / 2);
}
});
return $;
@@ -978,17 +1134,20 @@ function scorePs($, weightNodes) {
// content score, grandparents half
-function scoreContent$$1($) {
+function scoreContent($) {
var weightNodes = arguments.length > 1 && arguments[1] !== undefined ? arguments[1] : true;
+
// First, look for special hNews based selectors and give them a big
// boost, if they exist
- HNEWS_CONTENT_SELECTORS$1.forEach(function (_ref) {
- var _ref2 = _slicedToArray(_ref, 2),
+ _forEachInstanceProperty__default['default'](HNEWS_CONTENT_SELECTORS).call(HNEWS_CONTENT_SELECTORS, function (_ref) {
+ var _context;
+
+ var _ref2 = _slicedToArray__default['default'](_ref, 2),
parentSelector = _ref2[0],
childSelector = _ref2[1];
- $("".concat(parentSelector, " ").concat(childSelector)).each(function (index, node) {
- addScore$$1($(node).parent(parentSelector), $, 80);
+ $(_concatInstanceProperty__default['default'](_context = "".concat(parentSelector, " ")).call(_context, childSelector)).each(function (index, node) {
+ addScore($(node).parent(parentSelector), $, 80);
});
}); // Doubling this again
// Previous solution caused a bug
@@ -996,6 +1155,7 @@ function scoreContent$$1($) {
// scores. This is not ideal, and
// should be fixed.
+
scorePs($, weightNodes);
scorePs($, weightNodes);
return $;
@@ -1016,7 +1176,7 @@ function mergeSiblings($candidate, topScore, $) {
$candidate.parent().children().each(function (index, sibling) {
var $sibling = $(sibling); // Ignore tags like BR, HR, etc
- if (NON_TOP_CANDIDATE_TAGS_RE$1.test(sibling.tagName)) {
+ if (NON_TOP_CANDIDATE_TAGS_RE.test(sibling.tagName)) {
return null;
}
@@ -1079,12 +1239,12 @@ function mergeSiblings($candidate, topScore, $) {
// candidate nodes we found and find the one with the highest score.
-function findTopCandidate$$1($) {
+function findTopCandidate($) {
var $candidate;
var topScore = 0;
$('[score]').each(function (index, node) {
// Ignore tags like BR, HR, etc
- if (NON_TOP_CANDIDATE_TAGS_RE$1.test(node.tagName)) {
+ if (NON_TOP_CANDIDATE_TAGS_RE.test(node.tagName)) {
return;
}
@@ -1106,8 +1266,6 @@ function findTopCandidate$$1($) {
return $candidate;
}
-// Scoring
-
function removeUnlessContent($node, $, weight) {
// Explicitly save entry-content-asset tags, which are
// noted as valuable in the Publisher guidelines. For now
@@ -1156,9 +1314,11 @@ function removeUnlessContent($node, $, weight) {
var nodeIsList = tagName === 'ol' || tagName === 'ul';
if (nodeIsList) {
+ var _context;
+
var previousNode = $node.prev();
- if (previousNode && normalizeSpaces(previousNode.text()).slice(-1) === ':') {
+ if (previousNode && _sliceInstanceProperty__default['default'](_context = normalizeSpaces(previousNode.text())).call(_context, -1) === ':') {
return;
}
}
@@ -1182,15 +1342,15 @@ function removeUnlessContent($node, $, weight) {
// Return this same doc.
-function cleanTags$$1($article, $) {
+function cleanTags($article, $) {
$(CLEAN_CONDITIONALLY_TAGS, $article).each(function (index, node) {
var $node = $(node); // If marked to keep, skip it
- if ($node.hasClass(KEEP_CLASS) || $node.find(".".concat(KEEP_CLASS)).length > 0) return;
+ if ($node.hasClass(KEEP_CLASS) || _findInstanceProperty__default['default']($node).call($node, ".".concat(KEEP_CLASS)).length > 0) return;
var weight = getScore($node);
if (!weight) {
- weight = getOrInitScore$$1($node, $);
+ weight = getOrInitScore($node, $);
setScore($node, $, weight);
} // drop node if its weight is < 0
@@ -1235,12 +1395,12 @@ function cleanHeaders($article, $) {
// html to avoid later complications with multiple body tags.
-function rewriteTopLevel$$1(article, $) {
+function rewriteTopLevel(article, $) {
// I'm not using context here because
// it's problematic when converting the
// top-level/root node - AP
- $ = convertNodeTo$$1($('html'), $, 'div');
- $ = convertNodeTo$$1($('body'), $, 'div');
+ $ = convertNodeTo($('html'), $, 'div');
+ $ = convertNodeTo($('body'), $, 'div');
return $;
}
@@ -1250,7 +1410,7 @@ function absolutize($, rootUrl, attr) {
var attrs = getAttrs(node);
var url = attrs[attr];
if (!url) return;
- var absoluteUrl = URL.resolve(baseUrl || rootUrl, url);
+ var absoluteUrl = URL__default['default'].resolve(baseUrl || rootUrl, url);
setAttr(node, attr, absoluteUrl);
});
}
@@ -1266,38 +1426,45 @@ function absolutizeSet($, rootUrl, $content) {
// space characters inside the URL should be encoded (%20 or +)
var candidates = urlSet.match(/(?:\s*)(\S+(?:\s*[\d.]+[wx])?)(?:\s*,\s*)?/g);
if (!candidates) return;
- var absoluteCandidates = candidates.map(function (candidate) {
+
+ var absoluteCandidates = _mapInstanceProperty__default['default'](candidates).call(candidates, function (candidate) {
// a candidate URL cannot start or end with a comma
// descriptors are separated from the URLs by unescaped whitespace
- var parts = candidate.trim().replace(/,$/, '').split(/\s+/);
- parts[0] = URL.resolve(rootUrl, parts[0]);
+ var parts = _trimInstanceProperty__default['default'](candidate).call(candidate).replace(/,$/, '').split(/\s+/);
+
+ parts[0] = URL__default['default'].resolve(rootUrl, parts[0]);
return parts.join(' ');
});
- var absoluteUrlSet = _toConsumableArray(new _Set(absoluteCandidates)).join(', ');
+ var absoluteUrlSet = _toConsumableArray__default['default'](new _Set__default['default'](absoluteCandidates)).join(', ');
setAttr(node, 'srcset', absoluteUrlSet);
}
});
}
-function makeLinksAbsolute$$1($content, $, url) {
- ['href', 'src'].forEach(function (attr) {
+function makeLinksAbsolute($content, $, url) {
+ var _context;
+
+ _forEachInstanceProperty__default['default'](_context = ['href', 'src']).call(_context, function (attr) {
return absolutize($, url, attr);
});
+
absolutizeSet($, url, $content);
return $content;
}
function textLength(text) {
- return text.trim().replace(/\s+/g, ' ').length;
+ return _trimInstanceProperty__default['default'](text).call(text).replace(/\s+/g, ' ').length;
} // Determines what percentage of the text
// in a node is link text
// Takes a node, returns a float
function linkDensity($node) {
var totalTextLength = textLength($node.text());
- var linkText = $node.find('a').text();
+
+ var linkText = _findInstanceProperty__default['default']($node).call($node, 'a').text();
+
var linkLength = textLength(linkText);
if (totalTextLength > 0) {
@@ -1311,36 +1478,45 @@ function linkDensity($node) {
return 0;
}
+function _createForOfIteratorHelper(o, allowArrayLike) { var it; if (typeof _Symbol__default['default'] === "undefined" || _getIteratorMethod__default['default'](o) == null) { if (_Array$isArray__default['default'](o) || (it = _unsupportedIterableToArray(o)) || allowArrayLike && o && typeof o.length === "number") { if (it) o = it; var i = 0; var F = function F() {}; return { s: F, n: function n() { if (i >= o.length) return { done: true }; return { done: false, value: o[i++] }; }, e: function e(_e) { throw _e; }, f: F }; } throw new TypeError("Invalid attempt to iterate non-iterable instance.\nIn order to be iterable, non-array objects must have a [Symbol.iterator]() method."); } var normalCompletion = true, didErr = false, err; return { s: function s() { it = _getIterator__default['default'](o); }, n: function n() { var step = it.next(); normalCompletion = step.done; return step; }, e: function e(_e2) { didErr = true; err = _e2; }, f: function f() { try { if (!normalCompletion && it["return"] != null) it["return"](); } finally { if (didErr) throw err; } } }; }
+
+function _unsupportedIterableToArray(o, minLen) { var _context3; if (!o) return; if (typeof o === "string") return _arrayLikeToArray(o, minLen); var n = _sliceInstanceProperty__default['default'](_context3 = Object.prototype.toString.call(o)).call(_context3, 8, -1); if (n === "Object" && o.constructor) n = o.constructor.name; if (n === "Map" || n === "Set") return _Array$from__default['default'](o); if (n === "Arguments" || /^(?:Ui|I)nt(?:8|16|32)(?:Clamped)?Array$/.test(n)) return _arrayLikeToArray(o, minLen); }
+
+function _arrayLikeToArray(arr, len) { if (len == null || len > arr.length) len = arr.length; for (var i = 0, arr2 = new Array(len); i < len; i++) { arr2[i] = arr[i]; } return arr2; }
// search for, find a meta tag associated.
-function extractFromMeta$$1($, metaNames, cachedNames) {
+function extractFromMeta($, metaNames, cachedNames) {
var cleanTags = arguments.length > 3 && arguments[3] !== undefined ? arguments[3] : true;
- var foundNames = metaNames.filter(function (name) {
- return cachedNames.indexOf(name) !== -1;
+
+ var foundNames = _filterInstanceProperty__default['default'](metaNames).call(metaNames, function (name) {
+ return _indexOfInstanceProperty__default['default'](cachedNames).call(cachedNames, name) !== -1;
}); // eslint-disable-next-line no-restricted-syntax
- var _iteratorNormalCompletion = true;
- var _didIteratorError = false;
- var _iteratorError = undefined;
+
+ var _iterator = _createForOfIteratorHelper(foundNames),
+ _step;
try {
var _loop = function _loop() {
+ var _context, _context2;
+
var name = _step.value;
var type = 'name';
var value = 'value';
- var nodes = $("meta[".concat(type, "=\"").concat(name, "\"]")); // Get the unique value of every matching node, in case there
+ var nodes = $(_concatInstanceProperty__default['default'](_context = "meta[".concat(type, "=\"")).call(_context, name, "\"]")); // Get the unique value of every matching node, in case there
// are two meta tags with the same name and value.
// Remove empty values.
- var values = nodes.map(function (index, node) {
+ var values = _filterInstanceProperty__default['default'](_context2 = _mapInstanceProperty__default['default'](nodes).call(nodes, function (index, node) {
return $(node).attr(value);
- }).toArray().filter(function (text) {
+ }).toArray()).call(_context2, function (text) {
return text !== '';
}); // If we have more than one value for the same name, we have a
// conflict and can't trust any of them. Skip this name. If we have
// zero, that means our meta tags had no values. Skip this name
// also.
+
if (values.length === 1) {
var metaValue; // Meta values that contain HTML should be stripped, as they
// weren't subject to cleaning previously.
@@ -1348,7 +1524,7 @@ function extractFromMeta$$1($, metaNames, cachedNames) {
if (cleanTags) {
metaValue = stripTags(values[0], $);
} else {
- var _values = _slicedToArray(values, 1);
+ var _values = _slicedToArray__default['default'](values, 1);
metaValue = _values[0];
}
@@ -1359,30 +1535,27 @@ function extractFromMeta$$1($, metaNames, cachedNames) {
}
};
- for (var _iterator = _getIterator(foundNames), _step; !(_iteratorNormalCompletion = (_step = _iterator.next()).done); _iteratorNormalCompletion = true) {
+ for (_iterator.s(); !(_step = _iterator.n()).done;) {
var _ret = _loop();
- if (_typeof(_ret) === "object") return _ret.v;
+ if (_typeof__default['default'](_ret) === "object") return _ret.v;
} // If nothing is found, return null
} catch (err) {
- _didIteratorError = true;
- _iteratorError = err;
+ _iterator.e(err);
} finally {
- try {
- if (!_iteratorNormalCompletion && _iterator.return != null) {
- _iterator.return();
- }
- } finally {
- if (_didIteratorError) {
- throw _iteratorError;
- }
- }
+ _iterator.f();
}
return null;
}
+function _createForOfIteratorHelper$1(o, allowArrayLike) { var it; if (typeof _Symbol__default['default'] === "undefined" || _getIteratorMethod__default['default'](o) == null) { if (_Array$isArray__default['default'](o) || (it = _unsupportedIterableToArray$1(o)) || allowArrayLike && o && typeof o.length === "number") { if (it) o = it; var i = 0; var F = function F() {}; return { s: F, n: function n() { if (i >= o.length) return { done: true }; return { done: false, value: o[i++] }; }, e: function e(_e) { throw _e; }, f: F }; } throw new TypeError("Invalid attempt to iterate non-iterable instance.\nIn order to be iterable, non-array objects must have a [Symbol.iterator]() method."); } var normalCompletion = true, didErr = false, err; return { s: function s() { it = _getIterator__default['default'](o); }, n: function n() { var step = it.next(); normalCompletion = step.done; return step; }, e: function e(_e2) { didErr = true; err = _e2; }, f: function f() { try { if (!normalCompletion && it["return"] != null) it["return"](); } finally { if (didErr) throw err; } } }; }
+
+function _unsupportedIterableToArray$1(o, minLen) { var _context; if (!o) return; if (typeof o === "string") return _arrayLikeToArray$1(o, minLen); var n = _sliceInstanceProperty__default['default'](_context = Object.prototype.toString.call(o)).call(_context, 8, -1); if (n === "Object" && o.constructor) n = o.constructor.name; if (n === "Map" || n === "Set") return _Array$from__default['default'](o); if (n === "Arguments" || /^(?:Ui|I)nt(?:8|16|32)(?:Clamped)?Array$/.test(n)) return _arrayLikeToArray$1(o, minLen); }
+
+function _arrayLikeToArray$1(arr, len) { if (len == null || len > arr.length) len = arr.length; for (var i = 0, arr2 = new Array(len); i < len; i++) { arr2[i] = arr[i]; } return arr2; }
+
function isGoodNode($node, maxChildren) {
// If it has a number of children, it's more likely a container
// element. Skip it.
@@ -1391,7 +1564,7 @@ function isGoodNode($node, maxChildren) {
} // If it looks to be within a comment, skip it.
- if (withinComment$$1($node)) {
+ if (withinComment($node)) {
return false;
}
@@ -1401,16 +1574,16 @@ function isGoodNode($node, maxChildren) {
// meta-information, like author, title, date published, etc.
-function extractFromSelectors$$1($, selectors) {
+function extractFromSelectors($, selectors) {
var maxChildren = arguments.length > 2 && arguments[2] !== undefined ? arguments[2] : 1;
var textOnly = arguments.length > 3 && arguments[3] !== undefined ? arguments[3] : true;
+
// eslint-disable-next-line no-restricted-syntax
- var _iteratorNormalCompletion = true;
- var _didIteratorError = false;
- var _iteratorError = undefined;
+ var _iterator = _createForOfIteratorHelper$1(selectors),
+ _step;
try {
- for (var _iterator = _getIterator(selectors), _step; !(_iteratorNormalCompletion = (_step = _iterator.next()).done); _iteratorNormalCompletion = true) {
+ for (_iterator.s(); !(_step = _iterator.n()).done;) {
var selector = _step.value;
var nodes = $(selector); // If we didn't get exactly one of this selector, this may be
// a list of articles or comments. Skip it.
@@ -1434,18 +1607,9 @@ function extractFromSelectors$$1($, selectors) {
}
}
} catch (err) {
- _didIteratorError = true;
- _iteratorError = err;
+ _iterator.e(err);
} finally {
- try {
- if (!_iteratorNormalCompletion && _iterator.return != null) {
- _iterator.return();
- }
- } finally {
- if (_didIteratorError) {
- throw _iteratorError;
- }
- }
+ _iterator.f();
}
return null;
@@ -1459,15 +1623,21 @@ function stripTags(text, $) {
return cleanText === '' ? text : cleanText;
}
-function withinComment$$1($node) {
+function withinComment($node) {
var parents = $node.parents().toArray();
- var commentParent = parents.find(function (parent) {
+
+ var commentParent = _findInstanceProperty__default['default'](parents).call(parents, function (parent) {
+ var _context;
+
var attrs = getAttrs(parent);
- var nodeClass = attrs.class,
+ var nodeClass = attrs["class"],
id = attrs.id;
- var classAndId = "".concat(nodeClass, " ").concat(id);
- return classAndId.includes('comment');
+
+ var classAndId = _concatInstanceProperty__default['default'](_context = "".concat(nodeClass, " ")).call(_context, id);
+
+ return _includesInstanceProperty__default['default'](classAndId).call(classAndId, 'comment');
});
+
return commentParent !== undefined;
}
@@ -1475,7 +1645,9 @@ function withinComment$$1($node) {
// param: node (a cheerio node)
// return: boolean
function nodeIsSufficient($node) {
- return $node.text().trim().length >= 100;
+ var _context;
+
+ return _trimInstanceProperty__default['default'](_context = $node.text()).call(_context).length >= 100;
}
function isWordpress($) {
@@ -1487,7 +1659,9 @@ function getAttrs(node) {
attributes = node.attributes;
if (!attribs && attributes) {
- var attrs = _Reflect$ownKeys(attributes).reduce(function (acc, index) {
+ var _context;
+
+ var attrs = _reduceInstanceProperty__default['default'](_context = _Reflect$ownKeys__default['default'](attributes)).call(_context, function (acc, index) {
var attr = attributes[index];
if (!attr.name || !attr.value) return acc;
acc[attr.name] = attr.value;
@@ -1514,11 +1688,13 @@ function setAttrs(node, attrs) {
if (node.attribs) {
node.attribs = attrs;
} else if (node.attributes) {
+ var _context;
+
while (node.attributes.length > 0) {
node.removeAttribute(node.attributes[0].name);
}
- _Reflect$ownKeys(attrs).forEach(function (key) {
+ _forEachInstanceProperty__default['default'](_context = _Reflect$ownKeys__default['default'](attrs)).call(_context, function (key) {
node.setAttribute(key, attrs[key]);
});
}
@@ -1526,8 +1702,6 @@ function setAttrs(node, attrs) {
return node;
}
-// DOM manipulation
-
var IS_LINK = new RegExp('https?://', 'i');
var IMAGE_RE = '.(png|gif|jpe?g)';
var IS_IMAGE = new RegExp("".concat(IMAGE_RE), 'i');
@@ -1541,9 +1715,11 @@ var TAGS_TO_REMOVE = ['script', 'style', 'form'].join(',');
function convertLazyLoadedImages($) {
$('img').each(function (_, img) {
+ var _context;
+
var attrs = getAttrs(img);
- _Reflect$ownKeys(attrs).forEach(function (attr) {
+ _forEachInstanceProperty__default['default'](_context = _Reflect$ownKeys__default['default'](attrs)).call(_context, function (attr) {
var value = attrs[attr];
if (attr !== 'srcset' && IS_LINK.test(value) && IS_SRCSET.test(value)) {
@@ -1561,7 +1737,10 @@ function isComment(index, node) {
}
function cleanComments($) {
- $.root().find('*').contents().filter(isComment).remove();
+ var _context, _context2;
+
+ _filterInstanceProperty__default['default'](_context = _findInstanceProperty__default['default'](_context2 = $.root()).call(_context2, '*').contents()).call(_context, isComment).remove();
+
return $;
}
@@ -1579,19 +1758,17 @@ var Resource = {
// attempting to fetch it ourselves. Expects a
// string.
// :param headers: Custom headers to be included in the request
- create: function () {
- var _create = _asyncToGenerator(
- /*#__PURE__*/
- _regeneratorRuntime.mark(function _callee(url, preparedResponse, parsedUrl) {
- var headers,
- result,
- validResponse,
- _args = arguments;
- return _regeneratorRuntime.wrap(function _callee$(_context) {
+ create: function create(url, preparedResponse, parsedUrl) {
+ var _arguments = arguments,
+ _this = this;
+
+ return _asyncToGenerator__default['default']( /*#__PURE__*/_regeneratorRuntime__default['default'].mark(function _callee() {
+ var headers, result, validResponse;
+ return _regeneratorRuntime__default['default'].wrap(function _callee$(_context) {
while (1) {
switch (_context.prev = _context.next) {
case 0:
- headers = _args.length > 3 && _args[3] !== undefined ? _args[3] : {};
+ headers = _arguments.length > 3 && _arguments[3] !== undefined ? _arguments[3] : {};
if (!preparedResponse) {
_context.next = 6;
@@ -1608,7 +1785,8 @@ var Resource = {
};
result = {
body: preparedResponse,
- response: validResponse
+ response: validResponse,
+ alreadyDecoded: true
};
_context.next = 9;
break;
@@ -1630,36 +1808,33 @@ var Resource = {
return _context.abrupt("return", result);
case 12:
- return _context.abrupt("return", this.generateDoc(result));
+ return _context.abrupt("return", _this.generateDoc(result));
case 13:
case "end":
return _context.stop();
}
}
- }, _callee, this);
- }));
-
- function create(_x, _x2, _x3) {
- return _create.apply(this, arguments);
- }
-
- return create;
- }(),
+ }, _callee);
+ }))();
+ },
generateDoc: function generateDoc(_ref) {
var content = _ref.body,
- response = _ref.response;
+ response = _ref.response,
+ _ref$alreadyDecoded = _ref.alreadyDecoded,
+ alreadyDecoded = _ref$alreadyDecoded === void 0 ? false : _ref$alreadyDecoded;
var _response$headers$con = response.headers['content-type'],
contentType = _response$headers$con === void 0 ? '' : _response$headers$con; // TODO: Implement is_text function from
// https://github.com/ReadabilityHoldings/readability/blob/8dc89613241d04741ebd42fa9fa7df1b1d746303/readability/utils/text.py#L57
- if (!contentType.includes('html') && !contentType.includes('text')) {
+ if (!_includesInstanceProperty__default['default'](contentType).call(contentType, 'html') && !_includesInstanceProperty__default['default'](contentType).call(contentType, 'text')) {
throw new Error('Content does not appear to be text.');
}
var $ = this.encodeDoc({
content: content,
- contentType: contentType
+ contentType: contentType,
+ alreadyDecoded: alreadyDecoded
});
if ($.root().children().length === 0) {
@@ -1673,33 +1848,38 @@ var Resource = {
},
encodeDoc: function encodeDoc(_ref2) {
var content = _ref2.content,
- contentType = _ref2.contentType;
+ contentType = _ref2.contentType,
+ _ref2$alreadyDecoded = _ref2.alreadyDecoded,
+ alreadyDecoded = _ref2$alreadyDecoded === void 0 ? false : _ref2$alreadyDecoded;
+
+ if (alreadyDecoded) {
+ return cheerio__default['default'].load(content);
+ }
+
var encoding = getEncoding(contentType);
- var decodedContent = iconv.decode(content, encoding);
- var $ = cheerio.load(decodedContent); // after first cheerio.load, check to see if encoding matches
+ var decodedContent = iconv__default['default'].decode(content, encoding);
+ var $ = cheerio__default['default'].load(decodedContent); // after first cheerio.load, check to see if encoding matches
- var contentTypeSelector = cheerio.browser ? 'meta[http-equiv=content-type]' : 'meta[http-equiv=content-type i]';
+ var contentTypeSelector = cheerio__default['default'].browser ? 'meta[http-equiv=content-type]' : 'meta[http-equiv=content-type i]';
var metaContentType = $(contentTypeSelector).attr('content') || $('meta[charset]').attr('charset');
var properEncoding = getEncoding(metaContentType); // if encodings in the header/body dont match, use the one in the body
if (metaContentType && properEncoding !== encoding) {
- decodedContent = iconv.decode(content, properEncoding);
- $ = cheerio.load(decodedContent);
+ decodedContent = iconv__default['default'].decode(content, properEncoding);
+ $ = cheerio__default['default'].load(decodedContent);
}
return $;
}
};
-var _marked =
-/*#__PURE__*/
-_regeneratorRuntime.mark(range);
+var _marked = /*#__PURE__*/_regeneratorRuntime__default['default'].mark(range);
function range() {
var start,
end,
_args = arguments;
- return _regeneratorRuntime.wrap(function range$(_context) {
+ return _regeneratorRuntime__default['default'].wrap(function range$(_context) {
while (1) {
switch (_context.prev = _context.next) {
case 0:
@@ -1724,7 +1904,7 @@ function range() {
return _context.stop();
}
}
- }, _marked, this);
+ }, _marked);
}
// extremely simple url validation as a first step
@@ -1735,14 +1915,16 @@ function validateUrl(_ref) {
}
var merge = function merge(extractor, domains) {
- return domains.reduce(function (acc, domain) {
+ return _reduceInstanceProperty__default['default'](domains).call(domains, function (acc, domain) {
acc[domain] = extractor;
return acc;
}, {});
};
function mergeSupportedDomains(extractor) {
- return extractor.supportedDomains ? merge(extractor, [extractor.domain].concat(_toConsumableArray(extractor.supportedDomains))) : merge(extractor, [extractor.domain]);
+ var _context;
+
+ return extractor.supportedDomains ? merge(extractor, _concatInstanceProperty__default['default'](_context = [extractor.domain]).call(_context, _toConsumableArray__default['default'](extractor.supportedDomains))) : merge(extractor, [extractor.domain]);
}
var apiExtractors = {};
@@ -1754,7 +1936,7 @@ function addExtractor(extractor) {
};
}
- _Object$assign(apiExtractors, mergeSupportedDomains(extractor));
+ _Object$assign__default['default'](apiExtractors, mergeSupportedDomains(extractor));
return apiExtractors;
}
@@ -1864,7 +2046,8 @@ var TwitterExtractor = {
// selector grabs the whole page, then we're re-writing
// it to fit our needs before we clean it up.
'.permalink[role=main]': function permalinkRoleMain($node, $) {
- var tweets = $node.find('.tweet');
+ var tweets = _findInstanceProperty__default['default']($node).call($node, '.tweet');
+
var $tweetContainer = $('');
$tweetContainer.append(tweets);
$node.replaceWith($tweetContainer);
@@ -1955,13 +2138,13 @@ var TheAtlanticExtractor = {
var NewYorkerExtractor = {
domain: 'www.newyorker.com',
title: {
- selectors: ['h1[class^="ArticleHeader__hed"]', ['meta[name="og:title"]', 'value']]
+ selectors: ['h1[class^="content-header"]', 'h1[class^="ArticleHeader__hed"]', ['meta[name="og:title"]', 'value']]
},
author: {
- selectors: ['div[class^="ArticleContributors"] a[rel="author"]', 'article header div[class*="Byline__multipleContributors"]']
+ selectors: [['meta[name="author"]', 'value'], 'div[class^="ArticleContributors"] a[rel="author"]', 'article header div[class*="Byline__multipleContributors"]']
},
content: {
- selectors: ['main[class^="Layout__content"]'],
+ selectors: ['article.article.main-content', 'main[class^="Layout__content"]'],
// Is there anything in the content you selected that needs transformed
// before it's consumable content? E.g., unusual lazy loaded images
transforms: [],
@@ -1971,15 +2154,14 @@ var NewYorkerExtractor = {
clean: ['footer[class^="ArticleFooter__footer"]']
},
date_published: {
- selectors: [['meta[name="pubdate"]', 'value']],
- format: 'YYYYMMDD',
+ selectors: ['time.content-header__publish-date', ['meta[name="pubdate"]', 'value']],
timezone: 'America/New_York'
},
lead_image_url: {
selectors: [['meta[name="og:image"]', 'value']]
},
dek: {
- selectors: ['h2[class^="ArticleHeader__dek"]']
+ selectors: ['div.content-header__dek', 'h2[class^="ArticleHeader__dek"]']
},
next_page_url: null,
excerpt: null
@@ -1991,13 +2173,16 @@ var NewYorkerExtractor = {
var WiredExtractor = {
domain: 'www.wired.com',
title: {
- selectors: ['h1.post-title']
+ selectors: ['h1.content-header__hed', 'h1.post-title' // enter title selectors
+ ]
},
author: {
- selectors: ['a[rel="author"]']
+ selectors: [['meta[name="author"]', 'value'], 'a[rel="author"]' // enter author selectors
+ ]
},
content: {
- selectors: ['article.content'],
+ selectors: ['article.article.main-content', 'article.content' // enter content selectors
+ ],
// Is there anything in the content you selected that needs transformed
// before it's consumable content? E.g., unusual lazy loaded images
transforms: [],
@@ -2007,7 +2192,7 @@ var WiredExtractor = {
clean: ['.visually-hidden', 'figcaption img.photo']
},
date_published: {
- selectors: [['meta[itemprop="datePublished"]', 'value']]
+ selectors: ['time.content-header__publish-date', ['meta[itemprop="datePublished"]', 'value']]
},
lead_image_url: {
selectors: [['meta[name="og:image"]', 'value']]
@@ -2025,13 +2210,16 @@ var WiredExtractor = {
var MSNExtractor = {
domain: 'www.msn.com',
title: {
- selectors: ['h1']
+ selectors: ['h1' // enter title selectors
+ ]
},
author: {
- selectors: ['span.authorname-txt']
+ selectors: ['span.authorname-txt' // enter author selectors
+ ]
},
content: {
- selectors: ['div.richtext'],
+ selectors: ['div.richtext' // enter content selectors
+ ],
// Is there anything in the content you selected that needs transformed
// before it's consumable content? E.g., unusual lazy loaded images
transforms: [],
@@ -2059,10 +2247,12 @@ var MSNExtractor = {
var YahooExtractor = {
domain: 'www.yahoo.com',
title: {
- selectors: ['header.canvas-header']
+ selectors: ['header.canvas-header' // enter title selectors
+ ]
},
author: {
- selectors: ['span.provider-name']
+ selectors: ['span.provider-name' // enter author selectors
+ ]
},
content: {
selectors: [// enter content selectors
@@ -2095,10 +2285,12 @@ var YahooExtractor = {
var BuzzfeedExtractor = {
domain: 'www.buzzfeed.com',
title: {
- selectors: ['h1[id="post-title"]']
+ selectors: ['h1[id="post-title"]' // enter title selectors
+ ]
},
author: {
- selectors: ['a[data-action="user/username"]', 'byline__author']
+ selectors: ['a[data-action="user/username"]', 'byline__author' // enter author selectors
+ ]
},
content: {
selectors: [['.longform_custom_header_media', '#buzz_sub_buzz'], '#buzz_sub_buzz'],
@@ -2140,13 +2332,16 @@ var BuzzfeedExtractor = {
var WikiaExtractor = {
domain: 'fandom.wikia.com',
title: {
- selectors: ['h1.entry-title']
+ selectors: ['h1.entry-title' // enter title selectors
+ ]
},
author: {
- selectors: ['.author vcard', '.fn']
+ selectors: ['.author vcard', '.fn' // enter author selectors
+ ]
},
content: {
- selectors: ['.grid-content', '.entry-content'],
+ selectors: ['.grid-content', '.entry-content' // enter content selectors
+ ],
// Is there anything in the content you selected that needs transformed
// before it's consumable content? E.g., unusual lazy loaded images
transforms: [],
@@ -2174,10 +2369,12 @@ var WikiaExtractor = {
var LittleThingsExtractor = {
domain: 'www.littlethings.com',
title: {
- selectors: ['h1.post-title']
+ selectors: ['h1.post-title' // enter title selectors
+ ]
},
author: {
- selectors: [['meta[name="author"]', 'value']]
+ selectors: [['meta[name="author"]', 'value'] // enter author selectors
+ ]
},
content: {
selectors: [// enter content selectors
@@ -2386,13 +2583,15 @@ var MediumExtractor = {
if (ytRe.test(thumb)) {
var _thumb$match = thumb.match(ytRe),
- _thumb$match2 = _slicedToArray(_thumb$match, 2),
+ _thumb$match2 = _slicedToArray__default['default'](_thumb$match, 2),
_ = _thumb$match2[0],
youtubeId = _thumb$match2[1]; // eslint-disable-line
$node.attr('src', "https://www.youtube.com/embed/".concat(youtubeId));
- var $caption = $parent.find('figcaption');
+
+ var $caption = _findInstanceProperty__default['default']($parent).call($parent, 'figcaption');
+
$parent.empty().append([$node, $caption]);
return;
} // If we can't draw the YouTube preview, remove the figure.
@@ -2402,16 +2601,21 @@ var MediumExtractor = {
},
// rewrite figures to pull out image and caption, remove rest
figure: function figure($node) {
+ var _context;
+
// ignore if figure has an iframe
- if ($node.find('iframe').length > 0) return;
- var $img = $node.find('img').slice(-1)[0];
- var $caption = $node.find('figcaption');
+ if (_findInstanceProperty__default['default']($node).call($node, 'iframe').length > 0) return;
+
+ var $img = _sliceInstanceProperty__default['default'](_context = _findInstanceProperty__default['default']($node).call($node, 'img')).call(_context, -1)[0];
+
+ var $caption = _findInstanceProperty__default['default']($node).call($node, 'figcaption');
+
$node.empty().append([$img, $caption]);
},
// Remove any smaller images that did not get caught by the generic image
// cleaner (author photo 48px, leading sentence images 79px, etc.).
img: function img($node) {
- var width = _parseInt($node.attr('width'), 10);
+ var width = _parseInt__default['default']($node.attr('width'), 10);
if (width < 100) $node.remove();
}
@@ -2635,7 +2839,8 @@ var WwwThevergeComExtractor = {
// Is there anything that is in the result that shouldn't be?
// The clean selectors will remove anything that matches from
// the result
- clean: ['.aside', 'img.c-dynamic-image']
+ clean: ['.aside', 'img.c-dynamic-image' // images come from noscript transform
+ ]
}
};
@@ -2673,7 +2878,9 @@ var WwwCnnComExtractor = {
// to related content but not marked as such in any way.
'.zn-body__paragraph': function znBody__paragraph($node) {
if ($node.has('a')) {
- if ($node.text().trim() === $node.find('a').text().trim()) {
+ var _context, _context2;
+
+ if (_trimInstanceProperty__default['default'](_context = $node.text()).call(_context) === _trimInstanceProperty__default['default'](_context2 = _findInstanceProperty__default['default']($node).call($node, 'a').text()).call(_context2)) {
$node.remove();
}
}
@@ -2936,26 +3143,26 @@ var WwwRecodeNetExtractor = {
var QzComExtractor = {
domain: 'qz.com',
title: {
- selectors: ['header.item-header.content-width-responsive']
+ selectors: ['article header h1']
},
author: {
selectors: [['meta[name="author"]', 'value']]
},
date_published: {
- selectors: ['.timestamp']
+ selectors: [['time[datetime]', 'datetime']]
},
lead_image_url: {
- selectors: [['meta[name="og:image"]', 'value']]
+ selectors: [['meta[name="og:image"]', 'value'], ['meta[property="og:image"]', 'content'], ['meta[name="twitter:image"]', 'content']]
},
content: {
- selectors: [['figure.featured-image', '.item-body'], '.item-body'],
+ selectors: ['#article-content'],
// Is there anything in the content you selected that needs transformed
// before it's consumable content? E.g., unusual lazy loaded images
transforms: {},
// Is there anything that is in the result that shouldn't be?
// The clean selectors will remove anything that matches from
// the result
- clean: ['.article-aside', '.progressive-image-thumbnail']
+ clean: []
}
};
@@ -2970,7 +3177,8 @@ var WwwDmagazineComExtractor = {
date_published: {
selectors: [// enter selectors
'.story__info'],
- timezone: 'America/Chicago'
+ timezone: 'America/Chicago',
+ format: 'MMMM D, YYYY h:mm a'
},
dek: {
selectors: ['.story__subhead']
@@ -3095,8 +3303,11 @@ var WwwVoxComExtractor = {
// before it's consumable content? E.g., unusual lazy loaded images
transforms: {
'figure .e-image__image noscript': function figureEImage__imageNoscript($node) {
+ var _context;
+
var imgHtml = $node.html();
- $node.parents('.e-image__image').find('.c-dynamic-image').replaceWith(imgHtml);
+
+ _findInstanceProperty__default['default'](_context = $node.parents('.e-image__image')).call(_context, '.c-dynamic-image').replaceWith(imgHtml);
},
'figure .e-image__meta': 'figcaption'
},
@@ -3132,7 +3343,9 @@ var NewsNationalgeographicComExtractor = {
// before it's consumable content? E.g., unusual lazy loaded images
transforms: {
'.parsys.content': function parsysContent($node, $) {
- var $imgSrc = $node.find('.image.parbase.section').find('.picturefill').first().data('platform-src');
+ var _context;
+
+ var $imgSrc = _findInstanceProperty__default['default'](_context = _findInstanceProperty__default['default']($node).call($node, '.image.parbase.section')).call(_context, '.picturefill').first().data('platform-src');
if ($imgSrc) {
$node.prepend($("")));
@@ -3172,15 +3385,20 @@ var WwwNationalgeographicComExtractor = {
var $imageParent = $node.children().first();
if ($imageParent.hasClass('imageGroup')) {
- var $dataAttrContainer = $imageParent.find('.media--medium__container').children().first();
+ var $dataAttrContainer = _findInstanceProperty__default['default']($imageParent).call($imageParent, '.media--medium__container').children().first();
+
var imgPath1 = $dataAttrContainer.data('platform-image1-path');
var imgPath2 = $dataAttrContainer.data('platform-image2-path');
if (imgPath2 && imgPath1) {
- $node.prepend($("
\n \n \n
")));
+ var _context;
+
+ $node.prepend($(_concatInstanceProperty__default['default'](_context = "
\n \n \n
")));
}
} else {
- var $imgSrc = $node.find('.image.parbase.section').find('.picturefill').first().data('platform-src');
+ var _context2;
+
+ var $imgSrc = _findInstanceProperty__default['default'](_context2 = _findInstanceProperty__default['default']($node).call($node, '.image.parbase.section')).call(_context2, '.picturefill').first().data('platform-src');
if ($imgSrc) {
$node.prepend($("")));
@@ -3215,7 +3433,8 @@ var WwwLatimesComExtractor = {
// before it's consumable content? E.g., unusual lazy loaded images
transforms: {
'.trb_ar_la': function trb_ar_la($node) {
- var $figure = $node.find('figure');
+ var $figure = _findInstanceProperty__default['default']($node).call($node, 'figure');
+
$node.replaceWith($figure);
}
},
@@ -3338,7 +3557,7 @@ var WwwMsnbcComExtractor = {
// before it's consumable content? E.g., unusual lazy loaded images
transforms: {
'.pane-node-body': function paneNodeBody($node, $) {
- var _WwwMsnbcComExtractor = _slicedToArray(WwwMsnbcComExtractor.lead_image_url.selectors[0], 2),
+ var _WwwMsnbcComExtractor = _slicedToArray__default['default'](WwwMsnbcComExtractor.lead_image_url.selectors[0], 2),
selector = _WwwMsnbcComExtractor[0],
attr = _WwwMsnbcComExtractor[1];
@@ -3373,7 +3592,8 @@ var WwwThepoliticalinsiderComExtractor = {
]
},
lead_image_url: {
- selectors: [['meta[name="og:image"]', 'value']]
+ selectors: [['meta[name="og:image"]', 'value'] // enter selectors
+ ]
},
content: {
selectors: ['div#article-body'],
@@ -3927,7 +4147,8 @@ var WwwCnetComExtractor = {
// before it's consumable content? E.g., unusual lazy loaded images
transforms: {
'figure.image': function figureImage($node) {
- var $img = $node.find('img');
+ var $img = _findInstanceProperty__default['default']($node).call($node, 'img');
+
$img.attr('width', '100%');
$img.attr('height', '100%');
$img.addClass('__image-lead__');
@@ -4648,6 +4869,7 @@ var IciRadioCanadaCaExtractor = {
},
date_published: {
selectors: [['meta[name="dc.date.created"]', 'value']],
+ format: 'YYYY-MM-DD|HH[h]mm',
timezone: 'America/New_York'
},
dek: {
@@ -4860,7 +5082,8 @@ var WwwRedditComExtractor = {
content: {
selectors: [['div[data-test-id="post-content"] p'], // text post
['div[data-test-id="post-content"] a[target="_blank"]:not([data-click-id="timestamp"])', // external link
- 'div[data-test-id="post-content"] div[data-click-id="media"]'], // external link with media preview (YouTube, imgur album, etc...)
+ 'div[data-test-id="post-content"] div[data-click-id="media"]' // embedded media
+ ], // external link with media preview (YouTube, imgur album, etc...)
['div[data-test-id="post-content"] div[data-click-id="media"]'], // Embedded media (Reddit video)
['div[data-test-id="post-content"] a[target="_blank"]:not([data-click-id="timestamp"])'], // external link
'div[data-test-id="post-content"]'],
@@ -4869,7 +5092,8 @@ var WwwRedditComExtractor = {
transforms: {
'div[role="img"]': function divRoleImg($node) {
// External link image preview
- var $img = $node.find('img');
+ var $img = _findInstanceProperty__default['default']($node).call($node, 'img');
+
var bgImg = $node.css('background-image');
if ($img.length === 1 && bgImg) {
@@ -5584,7 +5808,7 @@ var WiredJpExtractor = {
'img[data-original]': function imgDataOriginal($node) {
var dataOriginal = $node.attr('data-original');
var src = $node.attr('src');
- var url = URL.resolve(src, dataOriginal);
+ var url = URL__default['default'].resolve(src, dataOriginal);
$node.attr('src', url);
}
},
@@ -5814,9 +6038,321 @@ var TimesofindiaIndiatimesComExtractor = {
}
};
+var MaTtiasBeExtractor = {
+ domain: 'ma.ttias.be',
+ title: {
+ selectors: [['meta[name="twitter:title"]', 'value']]
+ },
+ author: {
+ selectors: [['meta[name="author"]', 'value']]
+ },
+ date_published: {
+ selectors: [['meta[name="article:published_time"]', 'value']]
+ },
+ content: {
+ selectors: [['.content']],
+ // Is there anything in the content you selected that needs transformed
+ // before it's consumable content? E.g., unusual lazy loaded images
+ transforms: {
+ h2: function h2($node) {
+ // The "id" attribute values would result in low scores and the element being
+ // removed.
+ $node.attr('id', null); // h1 elements will be demoted to h2, so demote h2 elements to h3.
+
+ return 'h3';
+ },
+ h1: function h1($node) {
+ // The "id" attribute values would result in low scores and the element being
+ // removed.
+ $node.attr('id', null); // A subsequent h2 will be removed if there is not a paragraph before it, so
+ // add a paragraph here. It will be removed anyway because it is empty.
+
+ $node.after('');
+ },
+ ul: function ul($node) {
+ // Articles contain lists of links which look like, but are not, navigation
+ // elements. Adding this class attribute avoids them being incorrectly removed.
+ $node.attr('class', 'entry-content-asset');
+ }
+ }
+ }
+};
+
+var PastebinComExtractor = {
+ domain: 'pastebin.com',
+ title: {
+ selectors: ['h1']
+ },
+ author: {
+ selectors: ['.paste_box_line2 .t_us + a']
+ },
+ date_published: {
+ selectors: ['.paste_box_line2 .t_da + span'],
+ timezone: 'America/New_York'
+ },
+ lead_image_url: {
+ selectors: [['meta[name="og:image"]', 'value']]
+ },
+ content: {
+ selectors: ['#selectable .text'],
+ // Is there anything in the content you selected that needs transformed
+ // before it's consumable content? E.g., unusual lazy loaded images
+ transforms: {
+ ol: 'div',
+ li: 'p'
+ },
+ // Is there anything that is in the result that shouldn't be?
+ // The clean selectors will remove anything that matches from
+ // the result
+ clean: []
+ }
+};
+
+/* eslint-disable no-nested-ternary */
+
+/* eslint-disable no-unused-expressions */
+var WwwAbendblattDeExtractor = {
+ domain: 'www.abendblatt.de',
+ title: {
+ selectors: ['h2.article__header__headline']
+ },
+ author: {
+ selectors: ['span.author-info__name-text']
+ },
+ date_published: {
+ selectors: [['time.article__header__date', 'datetime']]
+ },
+ dek: {
+ selectors: ["span[itemprop='description']"]
+ },
+ lead_image_url: {
+ selectors: [["meta[name='og:image']", 'value']]
+ },
+ content: {
+ selectors: ['div.article__body'],
+ // Is there anything in the content you selected that needs transformed
+ // before it's consumable content? E.g., unusual lazy loaded images
+ transforms: {
+ p: function p($node) {
+ if (!$node.hasClass('obfuscated')) return null;
+ var o = '';
+ var n = 0;
+
+ for (var i = $node.text(); n < i.length; n += 1) {
+ var r = i.charCodeAt(n);
+ r === 177 ? o += '%' : r === 178 ? o += '!' : r === 180 ? o += ';' : r === 181 ? o += '=' : r === 32 ? o += ' ' : r === 10 ? o += '\n' : r > 33 && (o += String.fromCharCode(r - 1));
+ }
+
+ $node.html(o);
+ $node.removeClass('obfuscated');
+ $node.addClass('deobfuscated');
+ return null;
+ },
+ div: function div($node) {
+ if (!$node.hasClass('obfuscated')) return null;
+ var o = '';
+ var n = 0;
+
+ for (var i = $node.text(); n < i.length; n += 1) {
+ var r = i.charCodeAt(n);
+ r === 177 ? o += '%' : r === 178 ? o += '!' : r === 180 ? o += ';' : r === 181 ? o += '=' : r === 32 ? o += ' ' : r === 10 ? o += '\n' : r > 33 && (o += String.fromCharCode(r - 1));
+ }
+
+ $node.html(o);
+ $node.removeClass('obfuscated');
+ $node.addClass('deobfuscated');
+ return null;
+ }
+ },
+ // Is there anything that is in the result that shouldn't be?
+ // The clean selectors will remove anything that matches from
+ // the result
+ clean: []
+ }
+};
+
+var WwwGrueneDeExtractor = {
+ domain: 'www.gruene.de',
+ title: {
+ selectors: ['header h1']
+ },
+ author: null,
+ date_published: null,
+ dek: null,
+ lead_image_url: {
+ selectors: [['meta[property="og:image"]', 'content']]
+ },
+ content: {
+ // selectors: ['section'],
+ selectors: [['section header', 'section h2', 'section p', 'section ol']],
+ // Is there anything in the content you selected that needs transformed
+ // before it's consumable content? E.g., unusual lazy loaded images
+ transforms: {},
+ // Is there anything that is in the result that shouldn't be?
+ // The clean selectors will remove anything that matches from
+ // the result
+ clean: ['figcaption', 'p[class]']
+ }
+};
+
+var WwwEngadgetComExtractor = {
+ domain: 'www.engadget.com',
+ title: {
+ selectors: [['meta[name="og:title"]', 'value']]
+ },
+ author: {
+ selectors: ['a.th-meta[data-ylk*="subsec:author"]']
+ },
+ // Engadget stories have publish dates, but the only representation of them on the page
+ // is in a format like "2h ago". There are also these tags with blank values:
+ //
+ date_published: {
+ selectors: [// enter selectors
+ ]
+ },
+ dek: {
+ selectors: ['div[class*="o-title_mark"] div']
+ },
+ // Engadget stories do have lead images specified by an og:image meta tag, but selecting
+ // the value attribute of that tag fails. I believe the "ℑ" sequence of characters
+ // is triggering this inability to select the attribute value.
+ lead_image_url: {
+ selectors: [// enter selectors
+ ]
+ },
+ content: {
+ selectors: [[// Some figures will be inside div.article-text, but some header figures/images
+ // will not.
+ '#page_body figure:not(div.article-text figure)', 'div.article-text']],
+ // Is there anything in the content you selected that needs transformed
+ // before it's consumable content? E.g., unusual lazy loaded images
+ transforms: {},
+ // Is there anything that is in the result that shouldn't be?
+ // The clean selectors will remove anything that matches from
+ // the result
+ clean: []
+ }
+};
+var ArstechnicaComExtractor = {
+ domain: 'arstechnica.com',
+ // Articles from this site are often paginated, but I was unable to write a CSS
+ // selector to find the next page. On the last page, there will be a link with a CSS
+ // selector indicating that the previous page is next. But the parser appears to find
+ // the next page without this extractor finding it, as long as the fallback option is
+ // left at its default value of true.
+ title: {
+ selectors: ['title']
+ },
+ author: {
+ selectors: ['*[rel="author"] *[itemprop="name"]']
+ },
+ date_published: {
+ selectors: [['.byline time', 'datetime']]
+ },
+ dek: {
+ selectors: ['h2[itemprop="description"]']
+ },
+ lead_image_url: {
+ selectors: [['meta[name="og:image"]', 'value']]
+ },
+ content: {
+ selectors: ['div[itemprop="articleBody"]'],
+ // Is there anything in the content you selected that needs transformed
+ // before it's consumable content? E.g., unusual lazy loaded images
+ transforms: {
+ h2: function h2($node) {
+ // Some pages have an element h2 that is significant, and that the parser will
+ // remove if not following a paragraph. Adding this empty paragraph fixes it, and
+ // the empty paragraph will be removed anyway.
+ $node.before('');
+ }
+ },
+ // Is there anything that is in the result that shouldn't be?
+ // The clean selectors will remove anything that matches from
+ // the result.
+ clean: [// Remove enlarge links and separators inside image captions.
+ 'figcaption .enlarge-link', 'figcaption .sep', // I could not transform the video into usable elements, so I
+ // removed them.
+ 'figure.video', // Image galleries that do not work.
+ '.gallery', 'aside', '.sidebar']
+ }
+};
+
+var WwwNdtvComExtractor = {
+ domain: 'www.ndtv.com',
+ title: {
+ selectors: [['meta[name="og:title"]', 'value'], 'h1.entry-title']
+ },
+ author: {
+ selectors: ['span[itemprop="author"] span[itemprop="name"]']
+ },
+ date_published: {
+ selectors: [['span[itemprop="dateModified"]', 'content']]
+ },
+ dek: {
+ selectors: ['h2']
+ },
+ lead_image_url: {
+ selectors: [['meta[name="og:image"]', 'value']]
+ },
+ content: {
+ selectors: ['div[itemprop="articleBody"]'],
+ // Is there anything in the content you selected that needs transformed
+ // before it's consumable content? E.g., unusual lazy loaded images
+ transforms: {
+ // This site puts a dateline in a 'b' above the first paragraph, and then somehow
+ // blends it into the first paragraph with CSS. This transform moves the dateline
+ // to the first paragraph.
+ '.place_cont': function place_cont($node) {
+ if (!$node.parents('p').length) {
+ var nextSibling = $node.next('p');
+
+ if (nextSibling) {
+ $node.remove();
+ nextSibling.prepend($node);
+ }
+ }
+ }
+ },
+ // Is there anything that is in the result that shouldn't be?
+ // The clean selectors will remove anything that matches from
+ // the result
+ clean: ['.highlghts_Wdgt', '.ins_instory_dv_caption', 'input', '._world-wrapper .mt20']
+ }
+};
+
+var SpektrumExtractor = {
+ domain: 'www.spektrum.de',
+ title: {
+ selectors: ['.content__title']
+ },
+ author: {
+ selectors: ['.content__author__info__name']
+ },
+ date_published: {
+ selectors: ['.content__meta__date'],
+ timezone: 'Europe/Berlin'
+ },
+ dek: {
+ selectors: ['.content__intro']
+ },
+ lead_image_url: {
+ selectors: [// This is how the meta tag appears in the original source code.
+ ['meta[name="og:image"]', 'value'], // This is how the meta tag appears in the DOM in Chrome.
+ // The selector is included here to make the code work within the browser as well.
+ ['meta[property="og:image"]', 'content'], // This is the image that is shown on the page.
+ // It can be slightly cropped compared to the original in the meta tag.
+ '.image__article__top img']
+ },
+ content: {
+ selectors: ['article.content'],
+ clean: ['.breadcrumbs', '.hide-for-print', 'aside', 'header h2', '.image__article__top', '.content__author', '.copyright', '.callout-box']
+ }
+};
var CustomExtractors = /*#__PURE__*/Object.freeze({
+ __proto__: null,
BloggerExtractor: BloggerExtractor,
NYMagExtractor: NYMagExtractor,
WikipediaExtractor: WikipediaExtractor,
@@ -5952,14 +6488,29 @@ var CustomExtractors = /*#__PURE__*/Object.freeze({
BiorxivOrgExtractor: BiorxivOrgExtractor,
EpaperZeitDeExtractor: EpaperZeitDeExtractor,
WwwLadbibleComExtractor: WwwLadbibleComExtractor,
- TimesofindiaIndiatimesComExtractor: TimesofindiaIndiatimesComExtractor
+ TimesofindiaIndiatimesComExtractor: TimesofindiaIndiatimesComExtractor,
+ MaTtiasBeExtractor: MaTtiasBeExtractor,
+ PastebinComExtractor: PastebinComExtractor,
+ WwwAbendblattDeExtractor: WwwAbendblattDeExtractor,
+ WwwGrueneDeExtractor: WwwGrueneDeExtractor,
+ WwwEngadgetComExtractor: WwwEngadgetComExtractor,
+ ArstechnicaComExtractor: ArstechnicaComExtractor,
+ WwwNdtvComExtractor: WwwNdtvComExtractor,
+ SpektrumExtractor: SpektrumExtractor
});
-var Extractors = _Object$keys(CustomExtractors).reduce(function (acc, key) {
+var _context$2;
+
+function ownKeys$2(object, enumerableOnly) { var keys = _Object$keys__default['default'](object); if (_Object$getOwnPropertySymbols__default['default']) { var symbols = _Object$getOwnPropertySymbols__default['default'](object); if (enumerableOnly) symbols = _filterInstanceProperty__default['default'](symbols).call(symbols, function (sym) { return _Object$getOwnPropertyDescriptor__default['default'](object, sym).enumerable; }); keys.push.apply(keys, symbols); } return keys; }
+
+function _objectSpread$2(target) { for (var i = 1; i < arguments.length; i++) { var source = arguments[i] != null ? arguments[i] : {}; if (i % 2) { var _context2; _forEachInstanceProperty__default['default'](_context2 = ownKeys$2(Object(source), true)).call(_context2, function (key) { _defineProperty__default['default'](target, key, source[key]); }); } else if (_Object$getOwnPropertyDescriptors__default['default']) { _Object$defineProperties__default['default'](target, _Object$getOwnPropertyDescriptors__default['default'](source)); } else { var _context3; _forEachInstanceProperty__default['default'](_context3 = ownKeys$2(Object(source))).call(_context3, function (key) { _Object$defineProperty__default['default'](target, key, _Object$getOwnPropertyDescriptor__default['default'](source, key)); }); } } return target; }
+var Extractors = _reduceInstanceProperty__default['default'](_context$2 = _Object$keys__default['default'](CustomExtractors)).call(_context$2, function (acc, key) {
var extractor = CustomExtractors[key];
- return _objectSpread({}, acc, mergeSupportedDomains(extractor));
+ return _objectSpread$2(_objectSpread$2({}, acc), mergeSupportedDomains(extractor));
}, {});
+var _context$3, _context2, _context3;
+
// CLEAN AUTHOR CONSTANTS
var CLEAN_AUTHOR_RE = /^\s*(posted |written )?by\s*:?\s*(.*)/i; // CLEAN DEK CONSTANTS
@@ -5979,7 +6530,7 @@ var allMonths = months.join('|');
var timestamp1 = '[0-9]{1,2}:[0-9]{2,2}( ?[ap].?m.?)?';
var timestamp2 = '[0-9]{1,2}[/-][0-9]{1,2}[/-][0-9]{2,4}';
var timestamp3 = '-[0-9]{3,4}$';
-var SPLIT_DATE_STRING = new RegExp("(".concat(timestamp1, ")|(").concat(timestamp2, ")|(").concat(timestamp3, ")|([0-9]{1,4})|(").concat(allMonths, ")"), 'ig'); // 2016-11-22T08:57-500
+var SPLIT_DATE_STRING = new RegExp(_concatInstanceProperty__default['default'](_context$3 = _concatInstanceProperty__default['default'](_context2 = _concatInstanceProperty__default['default'](_context3 = "(".concat(timestamp1, ")|(")).call(_context3, timestamp2, ")|(")).call(_context2, timestamp3, ")|([0-9]{1,4})|(")).call(_context$3, allMonths, ")"), 'ig'); // 2016-11-22T08:57-500
// Check if datetime string has an offset at the end
var TIME_WITH_OFFSET_RE = /-\d{3,4}$/; // CLEAN TITLE CONSTANTS
@@ -5992,13 +6543,15 @@ var DOMAIN_ENDINGS_RE = new RegExp('.com$|.net$|.org$|.co.uk$', 'g');
// just the name(s): 'David Smith'.
function cleanAuthor(author) {
- return normalizeSpaces(author.replace(CLEAN_AUTHOR_RE, '$2').trim());
+ var _context;
+
+ return normalizeSpaces(_trimInstanceProperty__default['default'](_context = author.replace(CLEAN_AUTHOR_RE, '$2')).call(_context));
}
function clean$1(leadImageUrl) {
- leadImageUrl = leadImageUrl.trim();
+ leadImageUrl = _trimInstanceProperty__default['default'](leadImageUrl).call(leadImageUrl);
- if (validUrl.isWebUri(leadImageUrl)) {
+ if (validUrl__default['default'].isWebUri(leadImageUrl)) {
return leadImageUrl;
}
@@ -6018,27 +6571,29 @@ function cleanDek(dek, _ref) {
// not a good dek - bail.
if (TEXT_LINK_RE.test(dekText)) return null;
- return normalizeSpaces(dekText.trim());
+ return normalizeSpaces(_trimInstanceProperty__default['default'](dekText).call(dekText));
}
function cleanDateString(dateString) {
- return (dateString.match(SPLIT_DATE_STRING) || []).join(' ').replace(TIME_MERIDIAN_DOTS_RE, 'm').replace(TIME_MERIDIAN_SPACE_RE, '$1 $2 $3').replace(CLEAN_DATE_STRING_RE, '$1').trim();
+ var _context;
+
+ return _trimInstanceProperty__default['default'](_context = (dateString.match(SPLIT_DATE_STRING) || []).join(' ').replace(TIME_MERIDIAN_DOTS_RE, 'm').replace(TIME_MERIDIAN_SPACE_RE, '$1 $2 $3').replace(CLEAN_DATE_STRING_RE, '$1')).call(_context);
}
function createDate(dateString, timezone, format) {
if (TIME_WITH_OFFSET_RE.test(dateString)) {
- return moment(new Date(dateString));
+ return moment__default['default'](new Date(dateString));
}
if (TIME_AGO_STRING.test(dateString)) {
var fragments = TIME_AGO_STRING.exec(dateString);
- return moment().subtract(fragments[1], fragments[2]);
+ return moment__default['default']().subtract(fragments[1], fragments[2]);
}
if (TIME_NOW_STRING.test(dateString)) {
- return moment();
+ return moment__default['default']();
}
- return timezone ? moment.tz(dateString, format || parseFormat(dateString), timezone) : moment(dateString, format || parseFormat(dateString));
+ return timezone ? moment__default['default'].tz(dateString, format || parseFormat__default['default'](dateString), timezone) : moment__default['default'](dateString, format || parseFormat__default['default'](dateString));
} // Take a date published string, and hopefully return a date out of
// it. Return none if we fail.
@@ -6049,7 +6604,7 @@ function cleanDatePublished(dateString) {
// If string is in milliseconds or seconds, convert to int and return
if (MS_DATE_STRING.test(dateString) || SEC_DATE_STRING.test(dateString)) {
- return new Date(_parseInt(dateString, 10)).toISOString();
+ return new Date(_parseInt__default['default'](dateString, 10)).toISOString();
}
var date = createDate(dateString, timezone, format);
@@ -6065,7 +6620,6 @@ function cleanDatePublished(dateString) {
function extractCleanNode(article, _ref) {
var $ = _ref.$,
_ref$cleanConditional = _ref.cleanConditionally,
- cleanConditionally = _ref$cleanConditional === void 0 ? true : _ref$cleanConditional,
_ref$title = _ref.title,
title = _ref$title === void 0 ? '' : _ref$title,
_ref$url = _ref.url,
@@ -6074,13 +6628,13 @@ function extractCleanNode(article, _ref) {
defaultCleaner = _ref$defaultCleaner === void 0 ? true : _ref$defaultCleaner;
// Rewrite the tag name to div if it's a top level node like body or
// html to avoid later complications with multiple body tags.
- rewriteTopLevel$$1(article, $); // Drop small images and spacer images
+ rewriteTopLevel(article, $); // Drop small images and spacer images
// Only do this is defaultCleaner is set to true;
// this can sometimes be too aggressive.
if (defaultCleaner) cleanImages(article, $); // Make links absolute
- makeLinksAbsolute$$1(article, $, url); // Mark elements to keep that would normally be removed.
+ makeLinksAbsolute(article, $, url); // Mark elements to keep that would normally be removed.
// E.g., stripJunkTags will remove iframes, so we're going to mark
// YouTube/Vimeo videos as elements we want to keep.
@@ -6091,22 +6645,24 @@ function extractCleanNode(article, _ref) {
// by the title extractor instead. If there's less than 3 of them (<3),
// strip them. Otherwise, turn 'em into H2s.
- cleanHOnes$$1(article, $); // Clean headers
+ cleanHOnes(article, $); // Clean headers
cleanHeaders(article, $, title); // We used to clean UL's and OL's here, but it was leading to
// too many in-article lists being removed. Consider a better
// way to detect menus particularly and remove them.
// Also optionally running, since it can be overly aggressive.
- if (defaultCleaner) cleanTags$$1(article, $, cleanConditionally); // Remove empty paragraph nodes
+ if (defaultCleaner) cleanTags(article, $); // Remove empty paragraph nodes
removeEmpty(article, $); // Remove unnecessary attributes
- cleanAttributes$$1(article, $);
+ cleanAttributes(article, $);
return article;
}
-function cleanTitle$$1(title, _ref) {
+function cleanTitle(title, _ref) {
+ var _context;
+
var url = _ref.url,
$ = _ref.$;
@@ -6128,7 +6684,7 @@ function cleanTitle$$1(title, _ref) {
} // strip any html tags in the title text
- return normalizeSpaces(stripTags(title, $).trim());
+ return normalizeSpaces(_trimInstanceProperty__default['default'](_context = stripTags(title, $)).call(_context));
}
function extractBreadcrumbTitle(splitTitle, text) {
@@ -6136,22 +6692,24 @@ function extractBreadcrumbTitle(splitTitle, text) {
// The Best Gadgets on Earth : Bits : Blogs : NYTimes.com
// NYTimes - Blogs - Bits - The Best Gadgets on Earth
if (splitTitle.length >= 6) {
+ var _context;
+
// Look to see if we can find a breadcrumb splitter that happens
// more than once. If we can, we'll be able to better pull out
// the title.
- var termCounts = splitTitle.reduce(function (acc, titleText) {
+ var termCounts = _reduceInstanceProperty__default['default'](splitTitle).call(splitTitle, function (acc, titleText) {
acc[titleText] = acc[titleText] ? acc[titleText] + 1 : 1;
return acc;
}, {});
- var _Reflect$ownKeys$redu = _Reflect$ownKeys(termCounts).reduce(function (acc, key) {
+ var _Reflect$ownKeys$redu = _reduceInstanceProperty__default['default'](_context = _Reflect$ownKeys__default['default'](termCounts)).call(_context, function (acc, key) {
if (acc[1] < termCounts[key]) {
return [key, termCounts[key]];
}
return acc;
}, [0, 0]),
- _Reflect$ownKeys$redu2 = _slicedToArray(_Reflect$ownKeys$redu, 2),
+ _Reflect$ownKeys$redu2 = _slicedToArray__default['default'](_Reflect$ownKeys$redu, 2),
maxTerm = _Reflect$ownKeys$redu2[0],
termCount = _Reflect$ownKeys$redu2[1]; // We found a splitter that was used more than once, so it
// is probably the breadcrumber. Split our title on that instead.
@@ -6163,8 +6721,9 @@ function extractBreadcrumbTitle(splitTitle, text) {
splitTitle = text.split(maxTerm);
}
- var splitEnds = [splitTitle[0], splitTitle.slice(-1)];
- var longestEnd = splitEnds.reduce(function (acc, end) {
+ var splitEnds = [splitTitle[0], _sliceInstanceProperty__default['default'](splitTitle).call(splitTitle, -1)];
+
+ var longestEnd = _reduceInstanceProperty__default['default'](splitEnds).call(splitEnds, function (acc, end) {
return acc.length > end.length ? acc : end;
}, '');
@@ -6185,22 +6744,23 @@ function cleanDomainFromTitle(splitTitle, url) {
//
// Strip out the big TLDs - it just makes the matching a bit more
// accurate. Not the end of the world if it doesn't strip right.
- var _URL$parse = URL.parse(url),
+ var _URL$parse = URL__default['default'].parse(url),
host = _URL$parse.host;
var nakedDomain = host.replace(DOMAIN_ENDINGS_RE, '');
var startSlug = splitTitle[0].toLowerCase().replace(' ', '');
- var startSlugRatio = wuzzy.levenshtein(startSlug, nakedDomain);
+ var startSlugRatio = wuzzy__default['default'].levenshtein(startSlug, nakedDomain);
if (startSlugRatio > 0.4 && startSlug.length > 5) {
- return splitTitle.slice(2).join('');
+ return _sliceInstanceProperty__default['default'](splitTitle).call(splitTitle, 2).join('');
}
- var endSlug = splitTitle.slice(-1)[0].toLowerCase().replace(' ', '');
- var endSlugRatio = wuzzy.levenshtein(endSlug, nakedDomain);
+ var endSlug = _sliceInstanceProperty__default['default'](splitTitle).call(splitTitle, -1)[0].toLowerCase().replace(' ', '');
+
+ var endSlugRatio = wuzzy__default['default'].levenshtein(endSlug, nakedDomain);
if (endSlugRatio > 0.4 && endSlug.length >= 5) {
- return splitTitle.slice(0, -2).join('');
+ return _sliceInstanceProperty__default['default'](splitTitle).call(splitTitle, 0, -2).join('');
}
return null;
@@ -6233,7 +6793,7 @@ var Cleaners = {
dek: cleanDek,
date_published: cleanDatePublished,
content: extractCleanNode,
- title: cleanTitle$$1
+ title: cleanTitle
};
// likely to be article text.
@@ -6252,12 +6812,21 @@ function extractBestNode($, opts) {
$ = stripUnlikelyCandidates($);
}
- $ = convertToParagraphs$$1($);
- $ = scoreContent$$1($, opts.weightNodes);
- var $topCandidate = findTopCandidate$$1($);
+ $ = convertToParagraphs($);
+ $ = scoreContent($, opts.weightNodes);
+ var $topCandidate = findTopCandidate($);
return $topCandidate;
}
+function _createForOfIteratorHelper$2(o, allowArrayLike) { var it; if (typeof _Symbol__default['default'] === "undefined" || _getIteratorMethod__default['default'](o) == null) { if (_Array$isArray__default['default'](o) || (it = _unsupportedIterableToArray$2(o)) || allowArrayLike && o && typeof o.length === "number") { if (it) o = it; var i = 0; var F = function F() {}; return { s: F, n: function n() { if (i >= o.length) return { done: true }; return { done: false, value: o[i++] }; }, e: function e(_e) { throw _e; }, f: F }; } throw new TypeError("Invalid attempt to iterate non-iterable instance.\nIn order to be iterable, non-array objects must have a [Symbol.iterator]() method."); } var normalCompletion = true, didErr = false, err; return { s: function s() { it = _getIterator__default['default'](o); }, n: function n() { var step = it.next(); normalCompletion = step.done; return step; }, e: function e(_e2) { didErr = true; err = _e2; }, f: function f() { try { if (!normalCompletion && it["return"] != null) it["return"](); } finally { if (didErr) throw err; } } }; }
+
+function _unsupportedIterableToArray$2(o, minLen) { var _context4; if (!o) return; if (typeof o === "string") return _arrayLikeToArray$2(o, minLen); var n = _sliceInstanceProperty__default['default'](_context4 = Object.prototype.toString.call(o)).call(_context4, 8, -1); if (n === "Object" && o.constructor) n = o.constructor.name; if (n === "Map" || n === "Set") return _Array$from__default['default'](o); if (n === "Arguments" || /^(?:Ui|I)nt(?:8|16|32)(?:Clamped)?Array$/.test(n)) return _arrayLikeToArray$2(o, minLen); }
+
+function _arrayLikeToArray$2(arr, len) { if (len == null || len > arr.length) len = arr.length; for (var i = 0, arr2 = new Array(len); i < len; i++) { arr2[i] = arr[i]; } return arr2; }
+
+function ownKeys$3(object, enumerableOnly) { var keys = _Object$keys__default['default'](object); if (_Object$getOwnPropertySymbols__default['default']) { var symbols = _Object$getOwnPropertySymbols__default['default'](object); if (enumerableOnly) symbols = _filterInstanceProperty__default['default'](symbols).call(symbols, function (sym) { return _Object$getOwnPropertyDescriptor__default['default'](object, sym).enumerable; }); keys.push.apply(keys, symbols); } return keys; }
+
+function _objectSpread$3(target) { for (var i = 1; i < arguments.length; i++) { var source = arguments[i] != null ? arguments[i] : {}; if (i % 2) { var _context2; _forEachInstanceProperty__default['default'](_context2 = ownKeys$3(Object(source), true)).call(_context2, function (key) { _defineProperty__default['default'](target, key, source[key]); }); } else if (_Object$getOwnPropertyDescriptors__default['default']) { _Object$defineProperties__default['default'](target, _Object$getOwnPropertyDescriptors__default['default'](source)); } else { var _context3; _forEachInstanceProperty__default['default'](_context3 = ownKeys$3(Object(source))).call(_context3, function (key) { _Object$defineProperty__default['default'](target, key, _Object$getOwnPropertyDescriptor__default['default'](source, key)); }); } } return target; }
var GenericContentExtractor = {
defaultOpts: {
stripUnlikelyCandidates: true,
@@ -6284,12 +6853,14 @@ var GenericContentExtractor = {
// cleanConditionally: Clean the node to return of some
// superfluous content. Things like forms, ads, etc.
extract: function extract(_ref, opts) {
+ var _context;
+
var $ = _ref.$,
html = _ref.html,
title = _ref.title,
url = _ref.url;
- opts = _objectSpread({}, this.defaultOpts, opts);
- $ = $ || cheerio.load(html); // Cascade through our extraction-specific opts in an ordered fashion,
+ opts = _objectSpread$3(_objectSpread$3({}, this.defaultOpts), opts);
+ $ = $ || cheerio__default['default'].load(html); // Cascade through our extraction-specific opts in an ordered fashion,
// turning them off as we try to extract content.
var node = this.getContentNode($, title, url, opts);
@@ -6301,17 +6872,16 @@ var GenericContentExtractor = {
// eslint-disable-next-line no-restricted-syntax
- var _iteratorNormalCompletion = true;
- var _didIteratorError = false;
- var _iteratorError = undefined;
+ var _iterator = _createForOfIteratorHelper$2(_filterInstanceProperty__default['default'](_context = _Reflect$ownKeys__default['default'](opts)).call(_context, function (k) {
+ return opts[k] === true;
+ })),
+ _step;
try {
- for (var _iterator = _getIterator(_Reflect$ownKeys(opts).filter(function (k) {
- return opts[k] === true;
- })), _step; !(_iteratorNormalCompletion = (_step = _iterator.next()).done); _iteratorNormalCompletion = true) {
+ for (_iterator.s(); !(_step = _iterator.n()).done;) {
var key = _step.value;
opts[key] = false;
- $ = cheerio.load(html);
+ $ = cheerio__default['default'].load(html);
node = this.getContentNode($, title, url, opts);
if (nodeIsSufficient(node)) {
@@ -6319,18 +6889,9 @@ var GenericContentExtractor = {
}
}
} catch (err) {
- _didIteratorError = true;
- _iteratorError = err;
+ _iterator.e(err);
} finally {
- try {
- if (!_iteratorNormalCompletion && _iterator.return != null) {
- _iterator.return();
- }
- } finally {
- if (_didIteratorError) {
- throw _iteratorError;
- }
- }
+ _iterator.f();
}
return this.cleanAndReturnNode(node, $);
@@ -6384,27 +6945,27 @@ var GenericTitleExtractor = {
// First, check to see if we have a matching meta tag that we can make
// use of that is strongly associated with the headline.
var title;
- title = extractFromMeta$$1($, STRONG_TITLE_META_TAGS, metaCache);
- if (title) return cleanTitle$$1(title, {
+ title = extractFromMeta($, STRONG_TITLE_META_TAGS, metaCache);
+ if (title) return cleanTitle(title, {
url: url,
$: $
}); // Second, look through our content selectors for the most likely
// article title that is strongly associated with the headline.
- title = extractFromSelectors$$1($, STRONG_TITLE_SELECTORS);
- if (title) return cleanTitle$$1(title, {
+ title = extractFromSelectors($, STRONG_TITLE_SELECTORS);
+ if (title) return cleanTitle(title, {
url: url,
$: $
}); // Third, check for weaker meta tags that may match.
- title = extractFromMeta$$1($, WEAK_TITLE_META_TAGS, metaCache);
- if (title) return cleanTitle$$1(title, {
+ title = extractFromMeta($, WEAK_TITLE_META_TAGS, metaCache);
+ if (title) return cleanTitle(title, {
url: url,
$: $
}); // Last, look for weaker selector tags that may match.
- title = extractFromSelectors$$1($, WEAK_TITLE_SELECTORS);
- if (title) return cleanTitle$$1(title, {
+ title = extractFromSelectors($, WEAK_TITLE_SELECTORS);
+ if (title) return cleanTitle(title, {
url: url,
$: $
}); // If no matches, return an empty string
@@ -6434,6 +6995,11 @@ var AUTHOR_SELECTORS = ['.entry .entry-author', '.author.vcard .fn', '.author .v
var bylineRe = /^[\n\s]*By/i;
var BYLINE_SELECTORS_RE = [['#byline', bylineRe], ['.byline', bylineRe]];
+function _createForOfIteratorHelper$3(o, allowArrayLike) { var it; if (typeof _Symbol__default['default'] === "undefined" || _getIteratorMethod__default['default'](o) == null) { if (_Array$isArray__default['default'](o) || (it = _unsupportedIterableToArray$3(o)) || allowArrayLike && o && typeof o.length === "number") { if (it) o = it; var i = 0; var F = function F() {}; return { s: F, n: function n() { if (i >= o.length) return { done: true }; return { done: false, value: o[i++] }; }, e: function e(_e) { throw _e; }, f: F }; } throw new TypeError("Invalid attempt to iterate non-iterable instance.\nIn order to be iterable, non-array objects must have a [Symbol.iterator]() method."); } var normalCompletion = true, didErr = false, err; return { s: function s() { it = _getIterator__default['default'](o); }, n: function n() { var step = it.next(); normalCompletion = step.done; return step; }, e: function e(_e2) { didErr = true; err = _e2; }, f: function f() { try { if (!normalCompletion && it["return"] != null) it["return"](); } finally { if (didErr) throw err; } } }; }
+
+function _unsupportedIterableToArray$3(o, minLen) { var _context; if (!o) return; if (typeof o === "string") return _arrayLikeToArray$3(o, minLen); var n = _sliceInstanceProperty__default['default'](_context = Object.prototype.toString.call(o)).call(_context, 8, -1); if (n === "Object" && o.constructor) n = o.constructor.name; if (n === "Map" || n === "Set") return _Array$from__default['default'](o); if (n === "Arguments" || /^(?:Ui|I)nt(?:8|16|32)(?:Clamped)?Array$/.test(n)) return _arrayLikeToArray$3(o, minLen); }
+
+function _arrayLikeToArray$3(arr, len) { if (len == null || len > arr.length) len = arr.length; for (var i = 0, arr2 = new Array(len); i < len; i++) { arr2[i] = arr[i]; } return arr2; }
var GenericAuthorExtractor = {
extract: function extract(_ref) {
var $ = _ref.$,
@@ -6441,14 +7007,14 @@ var GenericAuthorExtractor = {
var author; // First, check to see if we have a matching
// meta tag that we can make use of.
- author = extractFromMeta$$1($, AUTHOR_META_TAGS, metaCache);
+ author = extractFromMeta($, AUTHOR_META_TAGS, metaCache);
if (author && author.length < AUTHOR_MAX_LENGTH) {
return cleanAuthor(author);
} // Second, look through our selectors looking for potential authors.
- author = extractFromSelectors$$1($, AUTHOR_SELECTORS, 2);
+ author = extractFromSelectors($, AUTHOR_SELECTORS, 2);
if (author && author.length < AUTHOR_MAX_LENGTH) {
return cleanAuthor(author);
@@ -6457,13 +7023,12 @@ var GenericAuthorExtractor = {
// eslint-disable-next-line no-restricted-syntax
- var _iteratorNormalCompletion = true;
- var _didIteratorError = false;
- var _iteratorError = undefined;
+ var _iterator = _createForOfIteratorHelper$3(BYLINE_SELECTORS_RE),
+ _step;
try {
- for (var _iterator = _getIterator(BYLINE_SELECTORS_RE), _step; !(_iteratorNormalCompletion = (_step = _iterator.next()).done); _iteratorNormalCompletion = true) {
- var _step$value = _slicedToArray(_step.value, 2),
+ for (_iterator.s(); !(_step = _iterator.n()).done;) {
+ var _step$value = _slicedToArray__default['default'](_step.value, 2),
selector = _step$value[0],
regex = _step$value[1];
@@ -6478,18 +7043,9 @@ var GenericAuthorExtractor = {
}
}
} catch (err) {
- _didIteratorError = true;
- _iteratorError = err;
+ _iterator.e(err);
} finally {
- try {
- if (!_iteratorNormalCompletion && _iterator.return != null) {
- _iterator.return();
- }
- } finally {
- if (_didIteratorError) {
- throw _iteratorError;
- }
- }
+ _iterator.f();
}
return null;
@@ -6520,11 +7076,11 @@ var GenericDatePublishedExtractor = {
// that we can make use of.
// Don't try cleaning tags from this string
- datePublished = extractFromMeta$$1($, DATE_PUBLISHED_META_TAGS, metaCache, false);
+ datePublished = extractFromMeta($, DATE_PUBLISHED_META_TAGS, metaCache, false);
if (datePublished) return cleanDatePublished(datePublished); // Second, look through our selectors looking for potential
// date_published's.
- datePublished = extractFromSelectors$$1($, DATE_PUBLISHED_SELECTORS);
+ datePublished = extractFromSelectors($, DATE_PUBLISHED_SELECTORS);
if (datePublished) return cleanDatePublished(datePublished); // Lastly, look to see if a dately string exists in the URL
datePublished = extractFromUrl(url, DATE_PUBLISHED_URL_RES);
@@ -6556,12 +7112,14 @@ var GIF_RE = /\.gif(\?.*)?$/i;
var JPG_RE = /\.jpe?g(\?.*)?$/i;
function getSig($node) {
- return "".concat($node.attr('class') || '', " ").concat($node.attr('id') || '');
+ var _context;
+
+ return _concatInstanceProperty__default['default'](_context = "".concat($node.attr('class') || '', " ")).call(_context, $node.attr('id') || '');
} // Scores image urls based on a variety of heuristics.
function scoreImageUrl(url) {
- url = url.trim();
+ url = _trimInstanceProperty__default['default'](url).call(url);
var score = 0;
if (POSITIVE_LEAD_IMAGE_URL_HINTS_RE.test(url)) {
@@ -6596,6 +7154,8 @@ function scoreAttr($img) {
// container elements, give a bonus if we find them
function scoreByParents($img) {
+ var _context2;
+
var score = 0;
var $figParent = $img.parents('figure').first();
@@ -6610,11 +7170,12 @@ function scoreByParents($img) {
$gParent = $parent.parent();
}
- [$parent, $gParent].forEach(function ($node) {
- if (PHOTO_HINTS_RE$1.test(getSig($node))) {
+ _forEachInstanceProperty__default['default'](_context2 = [$parent, $gParent]).call(_context2, function ($node) {
+ if (PHOTO_HINTS_RE.test(getSig($node))) {
score += 15;
}
});
+
return score;
} // Look at our immediate sibling and see if it looks like it's a
// caption. Bonus if so.
@@ -6628,7 +7189,7 @@ function scoreBySibling($img) {
score += 25;
}
- if (PHOTO_HINTS_RE$1.test(getSig($sibling))) {
+ if (PHOTO_HINTS_RE.test(getSig($sibling))) {
score += 15;
}
@@ -6637,9 +7198,9 @@ function scoreBySibling($img) {
function scoreByDimensions($img) {
var score = 0;
- var width = _parseFloat($img.attr('width'));
+ var width = _parseFloat__default['default']($img.attr('width'));
- var height = _parseFloat($img.attr('height'));
+ var height = _parseFloat__default['default']($img.attr('height'));
var src = $img.attr('src'); // Penalty for skinny images
@@ -6652,7 +7213,7 @@ function scoreByDimensions($img) {
score -= 50;
}
- if (width && height && !src.includes('sprite')) {
+ if (width && height && !_includesInstanceProperty__default['default'](src).call(src, 'sprite')) {
var area = width * height;
if (area < 5000) {
@@ -6669,6 +7230,11 @@ function scoreByPosition($imgs, index) {
return $imgs.length / 2 - index;
}
+function _createForOfIteratorHelper$4(o, allowArrayLike) { var it; if (typeof _Symbol__default['default'] === "undefined" || _getIteratorMethod__default['default'](o) == null) { if (_Array$isArray__default['default'](o) || (it = _unsupportedIterableToArray$4(o)) || allowArrayLike && o && typeof o.length === "number") { if (it) o = it; var i = 0; var F = function F() {}; return { s: F, n: function n() { if (i >= o.length) return { done: true }; return { done: false, value: o[i++] }; }, e: function e(_e) { throw _e; }, f: F }; } throw new TypeError("Invalid attempt to iterate non-iterable instance.\nIn order to be iterable, non-array objects must have a [Symbol.iterator]() method."); } var normalCompletion = true, didErr = false, err; return { s: function s() { it = _getIterator__default['default'](o); }, n: function n() { var step = it.next(); normalCompletion = step.done; return step; }, e: function e(_e2) { didErr = true; err = _e2; }, f: function f() { try { if (!normalCompletion && it["return"] != null) it["return"](); } finally { if (didErr) throw err; } } }; }
+
+function _unsupportedIterableToArray$4(o, minLen) { var _context2; if (!o) return; if (typeof o === "string") return _arrayLikeToArray$4(o, minLen); var n = _sliceInstanceProperty__default['default'](_context2 = Object.prototype.toString.call(o)).call(_context2, 8, -1); if (n === "Object" && o.constructor) n = o.constructor.name; if (n === "Map" || n === "Set") return _Array$from__default['default'](o); if (n === "Arguments" || /^(?:Ui|I)nt(?:8|16|32)(?:Clamped)?Array$/.test(n)) return _arrayLikeToArray$4(o, minLen); }
+
+function _arrayLikeToArray$4(arr, len) { if (len == null || len > arr.length) len = arr.length; for (var i = 0, arr2 = new Array(len); i < len; i++) { arr2[i] = arr[i]; } return arr2; }
// it. Like content and next page extraction, uses a scoring system
// to determine what the most likely image may be. Short circuits
// on really probable things like og:image meta tags.
@@ -6679,6 +7245,8 @@ function scoreByPosition($imgs, index) {
var GenericLeadImageUrlExtractor = {
extract: function extract(_ref) {
+ var _context;
+
var $ = _ref.$,
content = _ref.content,
metaCache = _ref.metaCache,
@@ -6693,7 +7261,7 @@ var GenericLeadImageUrlExtractor = {
// images usually have for things like Open Graph.
- var imageUrl = extractFromMeta$$1($, LEAD_IMAGE_URL_META_TAGS, metaCache, false);
+ var imageUrl = extractFromMeta($, LEAD_IMAGE_URL_META_TAGS, metaCache, false);
if (imageUrl) {
cleanUrl = clean$1(imageUrl);
@@ -6706,7 +7274,8 @@ var GenericLeadImageUrlExtractor = {
var $content = $(content);
var imgs = $('img', $content).toArray();
var imgScores = {};
- imgs.forEach(function (img, index) {
+
+ _forEachInstanceProperty__default['default'](imgs).call(imgs, function (img, index) {
var $img = $(img);
var src = $img.attr('src');
if (!src) return;
@@ -6719,10 +7288,10 @@ var GenericLeadImageUrlExtractor = {
imgScores[src] = score;
});
- var _Reflect$ownKeys$redu = _Reflect$ownKeys(imgScores).reduce(function (acc, key) {
+ var _Reflect$ownKeys$redu = _reduceInstanceProperty__default['default'](_context = _Reflect$ownKeys__default['default'](imgScores)).call(_context, function (acc, key) {
return imgScores[key] > acc[1] ? [key, imgScores[key]] : acc;
}, [null, 0]),
- _Reflect$ownKeys$redu2 = _slicedToArray(_Reflect$ownKeys$redu, 2),
+ _Reflect$ownKeys$redu2 = _slicedToArray__default['default'](_Reflect$ownKeys$redu, 2),
topUrl = _Reflect$ownKeys$redu2[0],
topScore = _Reflect$ownKeys$redu2[1];
@@ -6734,12 +7303,11 @@ var GenericLeadImageUrlExtractor = {
// eslint-disable-next-line no-restricted-syntax
- var _iteratorNormalCompletion = true;
- var _didIteratorError = false;
- var _iteratorError = undefined;
+ var _iterator = _createForOfIteratorHelper$4(LEAD_IMAGE_URL_SELECTORS),
+ _step;
try {
- for (var _iterator = _getIterator(LEAD_IMAGE_URL_SELECTORS), _step; !(_iteratorNormalCompletion = (_step = _iterator.next()).done); _iteratorNormalCompletion = true) {
+ for (_iterator.s(); !(_step = _iterator.n()).done;) {
var selector = _step.value;
var $node = $(selector).first();
var src = $node.attr('src');
@@ -6764,18 +7332,9 @@ var GenericLeadImageUrlExtractor = {
}
}
} catch (err) {
- _didIteratorError = true;
- _iteratorError = err;
+ _iterator.e(err);
} finally {
- try {
- if (!_iteratorNormalCompletion && _iterator.return != null) {
- _iterator.return();
- }
- } finally {
- if (_didIteratorError) {
- throw _iteratorError;
- }
- }
+ _iterator.f();
}
return null;
@@ -6789,7 +7348,7 @@ function scoreSimilarity(score, articleUrl, href) {
// sliding scale, subtract points from this link based on
// similarity.
if (score > 0) {
- var similarity = new difflib.SequenceMatcher(null, articleUrl, href).ratio(); // Subtract .1 from diff_percent when calculating modifier,
+ var similarity = new difflib__default['default'].SequenceMatcher(null, articleUrl, href).ratio(); // Subtract .1 from diff_percent when calculating modifier,
// which means that if it's less than 10% different, we give a
// bonus instead. Ex:
// 3% different = +17.5 points
@@ -6811,8 +7370,8 @@ function scoreLinkText(linkText, pageNum) {
// get scored, and sorted properly by score.
var score = 0;
- if (IS_DIGIT_RE.test(linkText.trim())) {
- var linkTextAsNum = _parseInt(linkText, 10); // If it's the first page, we already got it on the first call.
+ if (IS_DIGIT_RE.test(_trimInstanceProperty__default['default'](linkText).call(linkText))) {
+ var linkTextAsNum = _parseInt__default['default'](linkText, 10); // If it's the first page, we already got it on the first call.
// Give it a negative score. Otherwise, up to page 10, give a
// small bonus.
@@ -6845,25 +7404,25 @@ function scorePageInLink(pageNum, isWp) {
return 0;
}
-var DIGIT_RE$2 = /\d/; // A list of words that, if found in link text or URLs, likely mean that
+var DIGIT_RE = /\d/; // A list of words that, if found in link text or URLs, likely mean that
// this link is not a next page link.
-var EXTRANEOUS_LINK_HINTS$1 = ['print', 'archive', 'comment', 'discuss', 'e-mail', 'email', 'share', 'reply', 'all', 'login', 'sign', 'single', 'adx', 'entry-unrelated'];
-var EXTRANEOUS_LINK_HINTS_RE$1 = new RegExp(EXTRANEOUS_LINK_HINTS$1.join('|'), 'i'); // Match any link text/classname/id that looks like it could mean the next
+var EXTRANEOUS_LINK_HINTS = ['print', 'archive', 'comment', 'discuss', 'e-mail', 'email', 'share', 'reply', 'all', 'login', 'sign', 'single', 'adx', 'entry-unrelated'];
+var EXTRANEOUS_LINK_HINTS_RE = new RegExp(EXTRANEOUS_LINK_HINTS.join('|'), 'i'); // Match any link text/classname/id that looks like it could mean the next
// page. Things like: next, continue, >, >>, » but not >|, »| as those can
// mean last page.
-var NEXT_LINK_TEXT_RE$1 = new RegExp('(next|weiter|continue|>([^|]|$)|»([^|]|$))', 'i'); // Match any link text/classname/id that looks like it is an end link: things
+var NEXT_LINK_TEXT_RE = new RegExp('(next|weiter|continue|>([^|]|$)|»([^|]|$))', 'i'); // Match any link text/classname/id that looks like it is an end link: things
// like "first", "last", "end", etc.
-var CAP_LINK_TEXT_RE$1 = new RegExp('(first|last|end)', 'i'); // Match any link text/classname/id that looks like it means the previous
+var CAP_LINK_TEXT_RE = new RegExp('(first|last|end)', 'i'); // Match any link text/classname/id that looks like it means the previous
// page.
-var PREV_LINK_TEXT_RE$1 = new RegExp('(prev|earl|old|new|<|«)', 'i'); // Match any phrase that looks like it could be page, or paging, or pagination
+var PREV_LINK_TEXT_RE = new RegExp('(prev|earl|old|new|<|«)', 'i'); // Match any phrase that looks like it could be page, or paging, or pagination
function scoreExtraneousLinks(href) {
// If the URL itself contains extraneous values, give a penalty.
- if (EXTRANEOUS_LINK_HINTS_RE$1.test(href)) {
+ if (EXTRANEOUS_LINK_HINTS_RE.test(href)) {
return -25;
}
@@ -6871,10 +7430,14 @@ function scoreExtraneousLinks(href) {
}
function makeSig($link) {
- return "".concat($link.attr('class') || '', " ").concat($link.attr('id') || '');
+ var _context;
+
+ return _concatInstanceProperty__default['default'](_context = "".concat($link.attr('class') || '', " ")).call(_context, $link.attr('id') || '');
}
function scoreByParents$1($link) {
+ var _context2;
+
// If a parent node contains paging-like classname or id, give a
// bonus. Additionally, if a parent_node contains bad content
// (like 'sponsor'), give a penalty.
@@ -6883,12 +7446,12 @@ function scoreByParents$1($link) {
var negativeMatch = false;
var score = 0;
- _Array$from(range(0, 4)).forEach(function () {
+ _forEachInstanceProperty__default['default'](_context2 = _Array$from__default['default'](range(0, 4))).call(_context2, function () {
if ($parent.length === 0) {
return;
}
- var parentData = makeSig($parent, ' '); // If we have 'page' or 'paging' in our data, that's a good
+ var parentData = makeSig($parent); // If we have 'page' or 'paging' in our data, that's a good
// sign. Add a bonus.
if (!positiveMatch && PAGE_RE.test(parentData)) {
@@ -6899,7 +7462,7 @@ function scoreByParents$1($link) {
// a bad sign. Give a penalty.
- if (!negativeMatch && NEGATIVE_SCORE_RE.test(parentData) && EXTRANEOUS_LINK_HINTS_RE$1.test(parentData)) {
+ if (!negativeMatch && NEGATIVE_SCORE_RE.test(parentData) && EXTRANEOUS_LINK_HINTS_RE.test(parentData)) {
if (!POSITIVE_SCORE_RE.test(parentData)) {
negativeMatch = true;
score -= 25;
@@ -6915,7 +7478,7 @@ function scoreByParents$1($link) {
function scorePrevLink(linkData) {
// If the link has something like "previous", its definitely
// an old link, skip it.
- if (PREV_LINK_TEXT_RE$1.test(linkData)) {
+ if (PREV_LINK_TEXT_RE.test(linkData)) {
return -200;
}
@@ -6924,7 +7487,7 @@ function scorePrevLink(linkData) {
function shouldScore(href, articleUrl, baseUrl, parsedUrl, linkText, previousUrls) {
// skip if we've already fetched this url
- if (previousUrls.find(function (url) {
+ if (_findInstanceProperty__default['default'](previousUrls).call(previousUrls, function (url) {
return href === url;
}) !== undefined) {
return false;
@@ -6938,7 +7501,7 @@ function shouldScore(href, articleUrl, baseUrl, parsedUrl, linkText, previousUrl
var hostname = parsedUrl.hostname;
- var _URL$parse = URL.parse(href),
+ var _URL$parse = URL__default['default'].parse(href),
linkHost = _URL$parse.hostname; // Domain mismatch.
@@ -6950,13 +7513,13 @@ function shouldScore(href, articleUrl, baseUrl, parsedUrl, linkText, previousUrl
var fragment = href.replace(baseUrl, '');
- if (!DIGIT_RE$2.test(fragment)) {
+ if (!DIGIT_RE.test(fragment)) {
return false;
} // This link has extraneous content (like "comment") in its link
// text, so we skip it.
- if (EXTRANEOUS_LINK_HINTS_RE$1.test(linkText)) {
+ if (EXTRANEOUS_LINK_HINTS_RE.test(linkText)) {
return false;
} // Next page link text is never long, skip if it is too long.
@@ -6982,7 +7545,7 @@ function scoreBaseUrl(href, baseRegex) {
function scoreNextLinkText(linkData) {
// Things like "next", ">>", etc.
- if (NEXT_LINK_TEXT_RE$1.test(linkData)) {
+ if (NEXT_LINK_TEXT_RE.test(linkData)) {
return 50;
}
@@ -6991,12 +7554,12 @@ function scoreNextLinkText(linkData) {
function scoreCapLinks(linkData) {
// Cap links are links like "last", etc.
- if (CAP_LINK_TEXT_RE$1.test(linkData)) {
+ if (CAP_LINK_TEXT_RE.test(linkData)) {
// If we found a link like "last", but we've already seen that
// this link is also "next", it's fine. If it's not been
// previously marked as "next", then it's probably bad.
// Penalize.
- if (NEXT_LINK_TEXT_RE$1.test(linkData)) {
+ if (NEXT_LINK_TEXT_RE.test(linkData)) {
return -65;
}
}
@@ -7009,7 +7572,9 @@ function makeBaseRegex(baseUrl) {
}
function makeSig$1($link, linkText) {
- return "".concat(linkText || $link.text(), " ").concat($link.attr('class') || '', " ").concat($link.attr('id') || '');
+ var _context, _context2;
+
+ return _concatInstanceProperty__default['default'](_context = _concatInstanceProperty__default['default'](_context2 = "".concat(linkText || $link.text(), " ")).call(_context2, $link.attr('class') || '', " ")).call(_context, $link.attr('id') || '');
}
function scoreLinks(_ref) {
@@ -7020,7 +7585,7 @@ function scoreLinks(_ref) {
$ = _ref.$,
_ref$previousUrls = _ref.previousUrls,
previousUrls = _ref$previousUrls === void 0 ? [] : _ref$previousUrls;
- parsedUrl = parsedUrl || URL.parse(articleUrl);
+ parsedUrl = parsedUrl || URL__default['default'].parse(articleUrl);
var baseRegex = makeBaseRegex(baseUrl);
var isWp = isWordpress($); // Loop through all links, looking for hints that they may be next-page
// links. Things like having "page" in their textContent, className or
@@ -7030,7 +7595,7 @@ function scoreLinks(_ref) {
// looks most like the next page link, as long as its score is strong
// enough to have decent confidence.
- var scoredPages = links.reduce(function (possiblePages, link) {
+ var scoredPages = _reduceInstanceProperty__default['default'](links).call(links, function (possiblePages, link) {
// Remove any anchor data since we don't do a good job
// standardizing URLs (it's hard), we're going to do
// some checking with and without a trailing slash
@@ -7053,7 +7618,9 @@ function scoreLinks(_ref) {
href: href
};
} else {
- possiblePages[href].linkText = "".concat(possiblePages[href].linkText, "|").concat(linkText);
+ var _context3;
+
+ possiblePages[href].linkText = _concatInstanceProperty__default['default'](_context3 = "".concat(possiblePages[href].linkText, "|")).call(_context3, linkText);
}
var possiblePage = possiblePages[href];
@@ -7071,19 +7638,22 @@ function scoreLinks(_ref) {
possiblePage.score = score;
return possiblePages;
}, {});
- return _Reflect$ownKeys(scoredPages).length === 0 ? null : scoredPages;
+
+ return _Reflect$ownKeys__default['default'](scoredPages).length === 0 ? null : scoredPages;
}
// for multi-page articles
var GenericNextPageUrlExtractor = {
extract: function extract(_ref) {
+ var _context;
+
var $ = _ref.$,
url = _ref.url,
parsedUrl = _ref.parsedUrl,
_ref$previousUrls = _ref.previousUrls,
previousUrls = _ref$previousUrls === void 0 ? [] : _ref$previousUrls;
- parsedUrl = parsedUrl || URL.parse(url);
+ parsedUrl = parsedUrl || URL__default['default'].parse(url);
var articleUrl = removeAnchor(url);
var baseUrl = articleBaseUrl(url, parsedUrl);
var links = $('a[href]').toArray();
@@ -7099,7 +7669,7 @@ var GenericNextPageUrlExtractor = {
if (!scoredLinks) return null; // now that we've scored all possible pages,
// find the biggest one.
- var topPage = _Reflect$ownKeys(scoredLinks).reduce(function (acc, link) {
+ var topPage = _reduceInstanceProperty__default['default'](_context = _Reflect$ownKeys__default['default'](scoredLinks)).call(_context, function (acc, link) {
var scoredLink = scoredLinks[link];
return scoredLink.score > acc.score ? scoredLink : acc;
}, {
@@ -7119,7 +7689,7 @@ var GenericNextPageUrlExtractor = {
var CANONICAL_META_SELECTORS = ['og:url'];
function parseDomain(url) {
- var parsedUrl = URL.parse(url);
+ var parsedUrl = URL__default['default'].parse(url);
var hostname = parsedUrl.hostname;
return hostname;
}
@@ -7146,7 +7716,7 @@ var GenericUrlExtractor = {
}
}
- var metaUrl = extractFromMeta$$1($, CANONICAL_META_SELECTORS, metaCache);
+ var metaUrl = extractFromMeta($, CANONICAL_META_SELECTORS, metaCache);
if (metaUrl) {
return result(metaUrl);
@@ -7159,9 +7729,11 @@ var GenericUrlExtractor = {
var EXCERPT_META_SELECTORS = ['og:description', 'twitter:description'];
function clean$2(content, $) {
+ var _context;
+
var maxLength = arguments.length > 2 && arguments[2] !== undefined ? arguments[2] : 200;
- content = content.replace(/[\s\n]+/g, ' ').trim();
- return ellipsize(content, maxLength, {
+ content = _trimInstanceProperty__default['default'](_context = content.replace(/[\s\n]+/g, ' ')).call(_context);
+ return ellipsize__default['default'](content, maxLength, {
ellipse: '…'
});
}
@@ -7170,7 +7742,7 @@ var GenericExcerptExtractor = {
var $ = _ref.$,
content = _ref.content,
metaCache = _ref.metaCache;
- var excerpt = extractFromMeta$$1($, EXCERPT_META_SELECTORS, metaCache);
+ var excerpt = extractFromMeta($, EXCERPT_META_SELECTORS, metaCache);
if (excerpt) {
return clean$2(stripTags(excerpt, $));
@@ -7178,7 +7750,9 @@ var GenericExcerptExtractor = {
var maxLength = 200;
- var shortContent = content.slice(0, maxLength * 5);
+
+ var shortContent = _sliceInstanceProperty__default['default'](content).call(content, 0, maxLength * 5);
+
return clean$2($(shortContent).text(), $, maxLength);
}
};
@@ -7186,20 +7760,25 @@ var GenericExcerptExtractor = {
var GenericWordCountExtractor = {
extract: function extract(_ref) {
var content = _ref.content;
- var $ = cheerio.load(content);
+ var $ = cheerio__default['default'].load(content);
var $content = $('div').first();
var text = normalizeSpaces($content.text());
return text.split(/\s/).length;
}
};
+var _context$4;
+
+function ownKeys$4(object, enumerableOnly) { var keys = _Object$keys__default['default'](object); if (_Object$getOwnPropertySymbols__default['default']) { var symbols = _Object$getOwnPropertySymbols__default['default'](object); if (enumerableOnly) symbols = _filterInstanceProperty__default['default'](symbols).call(symbols, function (sym) { return _Object$getOwnPropertyDescriptor__default['default'](object, sym).enumerable; }); keys.push.apply(keys, symbols); } return keys; }
+
+function _objectSpread$4(target) { for (var i = 1; i < arguments.length; i++) { var source = arguments[i] != null ? arguments[i] : {}; if (i % 2) { var _context2; _forEachInstanceProperty__default['default'](_context2 = ownKeys$4(Object(source), true)).call(_context2, function (key) { _defineProperty__default['default'](target, key, source[key]); }); } else if (_Object$getOwnPropertyDescriptors__default['default']) { _Object$defineProperties__default['default'](target, _Object$getOwnPropertyDescriptors__default['default'](source)); } else { var _context3; _forEachInstanceProperty__default['default'](_context3 = ownKeys$4(Object(source))).call(_context3, function (key) { _Object$defineProperty__default['default'](target, key, _Object$getOwnPropertyDescriptor__default['default'](source, key)); }); } } return target; }
var GenericExtractor = {
// This extractor is the default for all domains
domain: '*',
title: GenericTitleExtractor.extract,
date_published: GenericDatePublishedExtractor.extract,
author: GenericAuthorExtractor.extract,
- content: GenericContentExtractor.extract.bind(GenericContentExtractor),
+ content: _bindInstanceProperty__default['default'](_context$4 = GenericContentExtractor.extract).call(_context$4, GenericContentExtractor),
lead_image_url: GenericLeadImageUrlExtractor.extract,
dek: GenericDekExtractor.extract,
next_page_url: GenericNextPageUrlExtractor.extract,
@@ -7208,34 +7787,34 @@ var GenericExtractor = {
word_count: GenericWordCountExtractor.extract,
direction: function direction(_ref) {
var title = _ref.title;
- return stringDirection.getDirection(title);
+ return stringDirection__default['default'].getDirection(title);
},
extract: function extract(options) {
var html = options.html,
$ = options.$;
if (html && !$) {
- var loaded = cheerio.load(html);
+ var loaded = cheerio__default['default'].load(html);
options.$ = loaded;
}
var title = this.title(options);
var date_published = this.date_published(options);
var author = this.author(options);
- var content = this.content(_objectSpread({}, options, {
+ var content = this.content(_objectSpread$4(_objectSpread$4({}, options), {}, {
title: title
}));
- var lead_image_url = this.lead_image_url(_objectSpread({}, options, {
+ var lead_image_url = this.lead_image_url(_objectSpread$4(_objectSpread$4({}, options), {}, {
content: content
}));
- var dek = this.dek(_objectSpread({}, options, {
+ var dek = this.dek(_objectSpread$4(_objectSpread$4({}, options), {}, {
content: content
}));
var next_page_url = this.next_page_url(options);
- var excerpt = this.excerpt(_objectSpread({}, options, {
+ var excerpt = this.excerpt(_objectSpread$4(_objectSpread$4({}, options), {}, {
content: content
}));
- var word_count = this.word_count(_objectSpread({}, options, {
+ var word_count = this.word_count(_objectSpread$4(_objectSpread$4({}, options), {}, {
content: content
}));
var direction = this.direction({
@@ -7268,7 +7847,9 @@ var Detectors = {
'meta[name="generator"][value="blogger"]': BloggerExtractor
};
function detectByHtml($) {
- var selector = _Reflect$ownKeys(Detectors).find(function (s) {
+ var _context;
+
+ var selector = _findInstanceProperty__default['default'](_context = _Reflect$ownKeys__default['default'](Detectors)).call(_context, function (s) {
return $(s).length > 0;
});
@@ -7276,13 +7857,21 @@ function detectByHtml($) {
}
function getExtractor(url, parsedUrl, $) {
- parsedUrl = parsedUrl || URL.parse(url);
+ var _context;
+
+ parsedUrl = parsedUrl || URL__default['default'].parse(url);
var _parsedUrl = parsedUrl,
hostname = _parsedUrl.hostname;
- var baseDomain = hostname.split('.').slice(-2).join('.');
+
+ var baseDomain = _sliceInstanceProperty__default['default'](_context = hostname.split('.')).call(_context, -2).join('.');
+
return apiExtractors[hostname] || apiExtractors[baseDomain] || Extractors[hostname] || Extractors[baseDomain] || detectByHtml($) || GenericExtractor;
}
+function ownKeys$5(object, enumerableOnly) { var keys = _Object$keys__default['default'](object); if (_Object$getOwnPropertySymbols__default['default']) { var symbols = _Object$getOwnPropertySymbols__default['default'](object); if (enumerableOnly) symbols = _filterInstanceProperty__default['default'](symbols).call(symbols, function (sym) { return _Object$getOwnPropertyDescriptor__default['default'](object, sym).enumerable; }); keys.push.apply(keys, symbols); } return keys; }
+
+function _objectSpread$5(target) { for (var i = 1; i < arguments.length; i++) { var source = arguments[i] != null ? arguments[i] : {}; if (i % 2) { var _context8; _forEachInstanceProperty__default['default'](_context8 = ownKeys$5(Object(source), true)).call(_context8, function (key) { _defineProperty__default['default'](target, key, source[key]); }); } else if (_Object$getOwnPropertyDescriptors__default['default']) { _Object$defineProperties__default['default'](target, _Object$getOwnPropertyDescriptors__default['default'](source)); } else { var _context9; _forEachInstanceProperty__default['default'](_context9 = ownKeys$5(Object(source))).call(_context9, function (key) { _Object$defineProperty__default['default'](target, key, _Object$getOwnPropertyDescriptor__default['default'](source, key)); }); } } return target; }
+
function cleanBySelectors($content, $, _ref) {
var clean = _ref.clean;
if (!clean) return $content;
@@ -7291,16 +7880,18 @@ function cleanBySelectors($content, $, _ref) {
} // Transform matching elements
function transformElements($content, $, _ref2) {
+ var _context;
+
var transforms = _ref2.transforms;
if (!transforms) return $content;
- _Reflect$ownKeys(transforms).forEach(function (key) {
+ _forEachInstanceProperty__default['default'](_context = _Reflect$ownKeys__default['default'](transforms)).call(_context, function (key) {
var $matches = $(key, $content);
var value = transforms[key]; // If value is a string, convert directly
if (typeof value === 'string') {
$matches.each(function (index, node) {
- convertNodeTo$$1($(node), $, transforms[key]);
+ convertNodeTo($(node), $, transforms[key]);
});
} else if (typeof value === 'function') {
// If value is function, apply function to node
@@ -7308,7 +7899,7 @@ function transformElements($content, $, _ref2) {
var result = value($(node), $); // If function returns a string, convert node to that value
if (typeof result === 'string') {
- convertNodeTo$$1($(node), $, result);
+ convertNodeTo($(node), $, result);
}
});
}
@@ -7318,22 +7909,26 @@ function transformElements($content, $, _ref2) {
}
function findMatchingSelector($, selectors, extractHtml, allowMultiple) {
- return selectors.find(function (selector) {
- if (_Array$isArray(selector)) {
+ return _findInstanceProperty__default['default'](selectors).call(selectors, function (selector) {
+ var _context3;
+
+ if (_Array$isArray__default['default'](selector)) {
+ var _context2;
+
if (extractHtml) {
- return selector.reduce(function (acc, s) {
+ return _reduceInstanceProperty__default['default'](selector).call(selector, function (acc, s) {
return acc && $(s).length > 0;
}, true);
}
- var _selector = _slicedToArray(selector, 2),
+ var _selector = _slicedToArray__default['default'](selector, 2),
s = _selector[0],
attr = _selector[1];
- return (allowMultiple || !allowMultiple && $(s).length === 1) && $(s).attr(attr) && $(s).attr(attr).trim() !== '';
+ return (allowMultiple || !allowMultiple && $(s).length === 1) && $(s).attr(attr) && _trimInstanceProperty__default['default'](_context2 = $(s).attr(attr)).call(_context2) !== '';
}
- return (allowMultiple || !allowMultiple && $(selector).length === 1) && $(selector).text().trim() !== '';
+ return (allowMultiple || !allowMultiple && $(selector).length === 1) && _trimInstanceProperty__default['default'](_context3 = $(selector).text()).call(_context3) !== '';
});
}
@@ -7356,7 +7951,7 @@ function select(opts) {
if (!matchingSelector) return null;
function transformAndClean($node) {
- makeLinksAbsolute$$1($node, $, opts.url || '');
+ makeLinksAbsolute($node, $, opts.url || '');
cleanBySelectors($node, $, extractionOpts);
transformElements($node, $, extractionOpts);
return $node;
@@ -7370,7 +7965,7 @@ function select(opts) {
// selectors to include in the result. Note that all selectors in the
// array must match in order for this selector to trigger
- if (_Array$isArray(matchingSelector)) {
+ if (_Array$isArray__default['default'](matchingSelector)) {
$content = $(matchingSelector.join(','));
var $wrapper = $('');
$content.each(function (_, element) {
@@ -7387,13 +7982,15 @@ function select(opts) {
$content = transformAndClean($content);
if (Cleaners[type]) {
- Cleaners[type]($content, _objectSpread({}, opts, {
+ Cleaners[type]($content, _objectSpread$5(_objectSpread$5({}, opts), {}, {
defaultCleaner: defaultCleaner
}));
}
if (allowMultiple) {
- return $content.children().toArray().map(function (el) {
+ var _context4;
+
+ return _mapInstanceProperty__default['default'](_context4 = $content.children().toArray()).call(_context4, function (el) {
return $.html($(el));
});
}
@@ -7402,48 +7999,55 @@ function select(opts) {
}
if (extractHtml) {
- return selectHtml(matchingSelector);
+ return selectHtml();
}
var $match;
var result; // if selector is an array (e.g., ['img', 'src']),
// extract the attr
- if (_Array$isArray(matchingSelector)) {
- var _matchingSelector = _slicedToArray(matchingSelector, 3),
+ if (_Array$isArray__default['default'](matchingSelector)) {
+ var _matchingSelector = _slicedToArray__default['default'](matchingSelector, 3),
selector = _matchingSelector[0],
attr = _matchingSelector[1],
transform = _matchingSelector[2];
$match = $(selector);
$match = transformAndClean($match);
- result = $match.map(function (_, el) {
- var item = $(el).attr(attr).trim();
+ result = _mapInstanceProperty__default['default']($match).call($match, function (_, el) {
+ var _context5;
+
+ var item = _trimInstanceProperty__default['default'](_context5 = $(el).attr(attr)).call(_context5);
+
return transform ? transform(item) : item;
});
} else {
$match = $(matchingSelector);
$match = transformAndClean($match);
- result = $match.map(function (_, el) {
- return $(el).text().trim();
+ result = _mapInstanceProperty__default['default']($match).call($match, function (_, el) {
+ var _context6;
+
+ return _trimInstanceProperty__default['default'](_context6 = $(el).text()).call(_context6);
});
}
- result = _Array$isArray(result.toArray()) && allowMultiple ? result.toArray() : result[0]; // Allow custom extractor to skip default cleaner
+ result = _Array$isArray__default['default'](result.toArray()) && allowMultiple ? result.toArray() : result[0]; // Allow custom extractor to skip default cleaner
// for this type; defaults to true
if (defaultCleaner && Cleaners[type]) {
- return Cleaners[type](result, _objectSpread({}, opts, extractionOpts));
+ return Cleaners[type](result, _objectSpread$5(_objectSpread$5({}, opts), extractionOpts));
}
return result;
}
function selectExtendedTypes(extend, opts) {
+ var _context7;
+
var results = {};
- _Reflect$ownKeys(extend).forEach(function (t) {
+ _forEachInstanceProperty__default['default'](_context7 = _Reflect$ownKeys__default['default'](extend)).call(_context7, function (t) {
if (!results[t]) {
- results[t] = select(_objectSpread({}, opts, {
+ results[t] = select(_objectSpread$5(_objectSpread$5({}, opts), {}, {
type: t,
extractionOpts: extend[t]
}));
@@ -7458,7 +8062,7 @@ function extractResult(opts) {
extractor = opts.extractor,
_opts$fallback = opts.fallback,
fallback = _opts$fallback === void 0 ? true : _opts$fallback;
- var result = select(_objectSpread({}, opts, {
+ var result = select(_objectSpread$5(_objectSpread$5({}, opts), {}, {
extractionOpts: extractor[type]
})); // If custom parser succeeds, return the result
@@ -7481,12 +8085,12 @@ var RootExtractor = {
extractedTitle = _opts.extractedTitle; // This is the generic extractor. Run its extract method
if (extractor.domain === '*') return extractor.extract(opts);
- opts = _objectSpread({}, opts, {
+ opts = _objectSpread$5(_objectSpread$5({}, opts), {}, {
extractor: extractor
});
if (contentOnly) {
- var _content = extractResult(_objectSpread({}, opts, {
+ var _content = extractResult(_objectSpread$5(_objectSpread$5({}, opts), {}, {
type: 'content',
extractHtml: true,
title: extractedTitle
@@ -7497,46 +8101,46 @@ var RootExtractor = {
};
}
- var title = extractResult(_objectSpread({}, opts, {
+ var title = extractResult(_objectSpread$5(_objectSpread$5({}, opts), {}, {
type: 'title'
}));
- var date_published = extractResult(_objectSpread({}, opts, {
+ var date_published = extractResult(_objectSpread$5(_objectSpread$5({}, opts), {}, {
type: 'date_published'
}));
- var author = extractResult(_objectSpread({}, opts, {
+ var author = extractResult(_objectSpread$5(_objectSpread$5({}, opts), {}, {
type: 'author'
}));
- var next_page_url = extractResult(_objectSpread({}, opts, {
+ var next_page_url = extractResult(_objectSpread$5(_objectSpread$5({}, opts), {}, {
type: 'next_page_url'
}));
- var content = extractResult(_objectSpread({}, opts, {
+ var content = extractResult(_objectSpread$5(_objectSpread$5({}, opts), {}, {
type: 'content',
extractHtml: true,
title: title
}));
- var lead_image_url = extractResult(_objectSpread({}, opts, {
+ var lead_image_url = extractResult(_objectSpread$5(_objectSpread$5({}, opts), {}, {
type: 'lead_image_url',
content: content
}));
- var excerpt = extractResult(_objectSpread({}, opts, {
+ var excerpt = extractResult(_objectSpread$5(_objectSpread$5({}, opts), {}, {
type: 'excerpt',
content: content
}));
- var dek = extractResult(_objectSpread({}, opts, {
+ var dek = extractResult(_objectSpread$5(_objectSpread$5({}, opts), {}, {
type: 'dek',
content: content,
excerpt: excerpt
}));
- var word_count = extractResult(_objectSpread({}, opts, {
+ var word_count = extractResult(_objectSpread$5(_objectSpread$5({}, opts), {}, {
type: 'word_count',
content: content
}));
- var direction = extractResult(_objectSpread({}, opts, {
+ var direction = extractResult(_objectSpread$5(_objectSpread$5({}, opts), {}, {
type: 'direction',
title: title
}));
- var _ref3 = extractResult(_objectSpread({}, opts, {
+ var _ref3 = extractResult(_objectSpread$5(_objectSpread$5({}, opts), {}, {
type: 'url_and_domain'
})) || {
url: null,
@@ -7551,7 +8155,7 @@ var RootExtractor = {
extendedResults = selectExtendedTypes(extractor.extend, opts);
}
- return _objectSpread({
+ return _objectSpread$5({
title: title,
content: content,
author: author,
@@ -7568,18 +8172,20 @@ var RootExtractor = {
}
};
+function ownKeys$6(object, enumerableOnly) { var keys = _Object$keys__default['default'](object); if (_Object$getOwnPropertySymbols__default['default']) { var symbols = _Object$getOwnPropertySymbols__default['default'](object); if (enumerableOnly) symbols = _filterInstanceProperty__default['default'](symbols).call(symbols, function (sym) { return _Object$getOwnPropertyDescriptor__default['default'](object, sym).enumerable; }); keys.push.apply(keys, symbols); } return keys; }
+
+function _objectSpread$6(target) { for (var i = 1; i < arguments.length; i++) { var source = arguments[i] != null ? arguments[i] : {}; if (i % 2) { var _context4; _forEachInstanceProperty__default['default'](_context4 = ownKeys$6(Object(source), true)).call(_context4, function (key) { _defineProperty__default['default'](target, key, source[key]); }); } else if (_Object$getOwnPropertyDescriptors__default['default']) { _Object$defineProperties__default['default'](target, _Object$getOwnPropertyDescriptors__default['default'](source)); } else { var _context5; _forEachInstanceProperty__default['default'](_context5 = ownKeys$6(Object(source))).call(_context5, function (key) { _Object$defineProperty__default['default'](target, key, _Object$getOwnPropertyDescriptor__default['default'](source, key)); }); } } return target; }
function collectAllPages(_x) {
return _collectAllPages.apply(this, arguments);
}
function _collectAllPages() {
- _collectAllPages = _asyncToGenerator(
- /*#__PURE__*/
- _regeneratorRuntime.mark(function _callee(_ref) {
- var next_page_url, html, $, metaCache, result, Extractor, title, url, pages, previousUrls, extractorOpts, nextPageResult, word_count;
- return _regeneratorRuntime.wrap(function _callee$(_context) {
+ _collectAllPages = _asyncToGenerator__default['default']( /*#__PURE__*/_regeneratorRuntime__default['default'].mark(function _callee(_ref) {
+ var next_page_url, html, $, metaCache, result, Extractor, title, url, pages, previousUrls, _context, _context2, extractorOpts, nextPageResult, word_count;
+
+ return _regeneratorRuntime__default['default'].wrap(function _callee$(_context3) {
while (1) {
- switch (_context.prev = _context.next) {
+ switch (_context3.prev = _context3.next) {
case 0:
next_page_url = _ref.next_page_url, html = _ref.html, $ = _ref.$, metaCache = _ref.metaCache, result = _ref.result, Extractor = _ref.Extractor, title = _ref.title, url = _ref.url;
// At this point, we've fetched just the first page
@@ -7589,42 +8195,41 @@ function _collectAllPages() {
case 3:
if (!(next_page_url && pages < 26)) {
- _context.next = 16;
+ _context3.next = 16;
break;
}
pages += 1; // eslint-disable-next-line no-await-in-loop
- _context.next = 7;
+ _context3.next = 7;
return Resource.create(next_page_url);
case 7:
- $ = _context.sent;
+ $ = _context3.sent;
html = $.html();
extractorOpts = {
url: next_page_url,
html: html,
$: $,
metaCache: metaCache,
- contentOnly: true,
extractedTitle: title,
previousUrls: previousUrls
};
nextPageResult = RootExtractor.extract(Extractor, extractorOpts);
previousUrls.push(next_page_url);
- result = _objectSpread({}, result, {
- content: "".concat(result.content, "
")
});
- return _context.abrupt("return", _objectSpread({}, result, {
+ return _context3.abrupt("return", _objectSpread$6(_objectSpread$6({}, result), {}, {
total_pages: pages,
pages_rendered: pages,
word_count: word_count
@@ -7632,84 +8237,65 @@ function _collectAllPages() {
case 18:
case "end":
- return _context.stop();
+ return _context3.stop();
}
}
- }, _callee, this);
+ }, _callee);
}));
return _collectAllPages.apply(this, arguments);
}
+function ownKeys$7(object, enumerableOnly) { var keys = _Object$keys__default['default'](object); if (_Object$getOwnPropertySymbols__default['default']) { var symbols = _Object$getOwnPropertySymbols__default['default'](object); if (enumerableOnly) symbols = _filterInstanceProperty__default['default'](symbols).call(symbols, function (sym) { return _Object$getOwnPropertyDescriptor__default['default'](object, sym).enumerable; }); keys.push.apply(keys, symbols); } return keys; }
+
+function _objectSpread$7(target) { for (var i = 1; i < arguments.length; i++) { var source = arguments[i] != null ? arguments[i] : {}; if (i % 2) { var _context3; _forEachInstanceProperty__default['default'](_context3 = ownKeys$7(Object(source), true)).call(_context3, function (key) { _defineProperty__default['default'](target, key, source[key]); }); } else if (_Object$getOwnPropertyDescriptors__default['default']) { _Object$defineProperties__default['default'](target, _Object$getOwnPropertyDescriptors__default['default'](source)); } else { var _context4; _forEachInstanceProperty__default['default'](_context4 = ownKeys$7(Object(source))).call(_context4, function (key) { _Object$defineProperty__default['default'](target, key, _Object$getOwnPropertyDescriptor__default['default'](source, key)); }); } } return target; }
var Mercury = {
- parse: function () {
- var _parse = _asyncToGenerator(
- /*#__PURE__*/
- _regeneratorRuntime.mark(function _callee(url) {
- var _ref,
- html,
- opts,
- _opts$fetchAllPages,
- fetchAllPages,
- _opts$fallback,
- fallback,
- _opts$contentType,
- contentType,
- _opts$headers,
- headers,
- extend,
- customExtractor,
- parsedUrl,
- $,
- Extractor,
- metaCache,
- extendedTypes,
- result,
- _result,
- title,
- next_page_url,
- turndownService,
- _args = arguments;
-
- return _regeneratorRuntime.wrap(function _callee$(_context) {
+ parse: function parse(url) {
+ var _arguments = arguments;
+ return _asyncToGenerator__default['default']( /*#__PURE__*/_regeneratorRuntime__default['default'].mark(function _callee() {
+ var _context;
+
+ var _ref, html, opts, _opts$fetchAllPages, fetchAllPages, _opts$fallback, fallback, _opts$contentType, contentType, _opts$headers, headers, extend, customExtractor, parsedUrl, $, Extractor, metaCache, extendedTypes, result, _result, title, next_page_url, turndownService;
+
+ return _regeneratorRuntime__default['default'].wrap(function _callee$(_context2) {
while (1) {
- switch (_context.prev = _context.next) {
+ switch (_context2.prev = _context2.next) {
case 0:
- _ref = _args.length > 1 && _args[1] !== undefined ? _args[1] : {}, html = _ref.html, opts = _objectWithoutProperties(_ref, ["html"]);
+ _ref = _arguments.length > 1 && _arguments[1] !== undefined ? _arguments[1] : {}, html = _ref.html, opts = _objectWithoutProperties__default['default'](_ref, ["html"]);
_opts$fetchAllPages = opts.fetchAllPages, fetchAllPages = _opts$fetchAllPages === void 0 ? true : _opts$fetchAllPages, _opts$fallback = opts.fallback, fallback = _opts$fallback === void 0 ? true : _opts$fallback, _opts$contentType = opts.contentType, contentType = _opts$contentType === void 0 ? 'html' : _opts$contentType, _opts$headers = opts.headers, headers = _opts$headers === void 0 ? {} : _opts$headers, extend = opts.extend, customExtractor = opts.customExtractor; // if no url was passed and this is the browser version,
// set url to window.location.href and load the html
// from the current page
- if (!url && cheerio.browser) {
+ if (!url && cheerio__default['default'].browser) {
url = window.location.href; // eslint-disable-line no-undef
- html = html || cheerio.html();
+ html = html || cheerio__default['default'].html();
}
- parsedUrl = URL.parse(url);
+ parsedUrl = URL__default['default'].parse(url);
if (validateUrl(parsedUrl)) {
- _context.next = 6;
+ _context2.next = 6;
break;
}
- return _context.abrupt("return", {
+ return _context2.abrupt("return", {
error: true,
message: 'The url parameter passed does not look like a valid URL. Please check your URL and try again.'
});
case 6:
- _context.next = 8;
+ _context2.next = 8;
return Resource.create(url, html, parsedUrl, headers);
case 8:
- $ = _context.sent;
+ $ = _context2.sent;
if (!$.failed) {
- _context.next = 11;
+ _context2.next = 11;
break;
}
- return _context.abrupt("return", $);
+ return _context2.abrupt("return", $);
case 11:
// Add custom extractor via cli.
@@ -7727,7 +8313,7 @@ var Mercury = {
// Used when extracting title/author/date_published/dek
- metaCache = $('meta').map(function (_, node) {
+ metaCache = _mapInstanceProperty__default['default'](_context = $('meta')).call(_context, function (_, node) {
return $(node).attr('name');
}).toArray();
extendedTypes = {};
@@ -7752,11 +8338,11 @@ var Mercury = {
_result = result, title = _result.title, next_page_url = _result.next_page_url; // Fetch more pages if next_page_url found
if (!(fetchAllPages && next_page_url)) {
- _context.next = 25;
+ _context2.next = 25;
break;
}
- _context.next = 22;
+ _context2.next = 22;
return collectAllPages({
Extractor: Extractor,
next_page_url: next_page_url,
@@ -7769,49 +8355,44 @@ var Mercury = {
});
case 22:
- result = _context.sent;
- _context.next = 26;
+ result = _context2.sent;
+ _context2.next = 26;
break;
case 25:
- result = _objectSpread({}, result, {
+ result = _objectSpread$7(_objectSpread$7({}, result), {}, {
total_pages: 1,
rendered_pages: 1
});
case 26:
if (contentType === 'markdown') {
- turndownService = new TurndownService();
+ turndownService = new TurndownService__default['default']();
result.content = turndownService.turndown(result.content);
} else if (contentType === 'text') {
result.content = $.text($(result.content));
}
- return _context.abrupt("return", _objectSpread({}, result, extendedTypes));
+ return _context2.abrupt("return", _objectSpread$7(_objectSpread$7({}, result), extendedTypes));
case 28:
case "end":
- return _context.stop();
+ return _context2.stop();
}
}
- }, _callee, this);
- }));
-
- function parse(_x) {
- return _parse.apply(this, arguments);
- }
-
- return parse;
- }(),
- browser: !!cheerio.browser,
+ }, _callee);
+ }))();
+ },
+ browser: !!cheerio__default['default'].browser,
// A convenience method for getting a resource
// to work with, e.g., for custom extractor generator
fetchResource: function fetchResource(url) {
return Resource.create(url);
},
- addExtractor: function addExtractor$$1(extractor) {
+ addExtractor: function addExtractor$1(extractor) {
return addExtractor(extractor);
}
};
module.exports = Mercury;
+//# sourceMappingURL=mercury.js.map
diff --git a/dist/mercury.js.map b/dist/mercury.js.map
index 2d067a31d..c4702242e 100644
--- a/dist/mercury.js.map
+++ b/dist/mercury.js.map
@@ -1 +1 @@
-{"version":3,"file":null,"sources":["../src/utils/text/normalize-spaces.js","../src/utils/text/extract-from-url.js","../src/utils/text/constants.js","../src/utils/text/page-num-from-url.js","../src/utils/text/remove-anchor.js","../src/utils/text/article-base-url.js","../src/utils/text/has-sentence-end.js","../src/utils/text/excerpt-content.js","../src/utils/text/get-encoding.js","../src/utils/range.js","../src/utils/validate-url.js","../src/utils/errors.js","../src/resource/utils/constants.js","../src/resource/utils/fetch-resource.js","../src/resource/utils/dom/normalize-meta-tags.js","../src/utils/dom/constants.js","../src/utils/dom/strip-unlikely-candidates.js","../src/utils/dom/brs-to-ps.js","../src/utils/dom/paragraphize.js","../src/utils/dom/convert-to-paragraphs.js","../src/utils/dom/convert-node-to.js","../src/utils/dom/clean-images.js","../src/utils/dom/mark-to-keep.js","../src/utils/dom/strip-junk-tags.js","../src/utils/dom/clean-h-ones.js","../src/utils/dom/clean-attributes.js","../src/utils/dom/remove-empty.js","../src/extractors/generic/content/scoring/constants.js","../src/extractors/generic/content/scoring/get-weight.js","../src/extractors/generic/content/scoring/get-score.js","../src/extractors/generic/content/scoring/score-commas.js","../src/extractors/generic/content/scoring/score-length.js","../src/extractors/generic/content/scoring/score-paragraph.js","../src/extractors/generic/content/scoring/set-score.js","../src/extractors/generic/content/scoring/add-score.js","../src/extractors/generic/content/scoring/add-to-parent.js","../src/extractors/generic/content/scoring/get-or-init-score.js","../src/extractors/generic/content/scoring/score-node.js","../src/extractors/generic/content/scoring/score-content.js","../src/extractors/generic/content/scoring/merge-siblings.js","../src/extractors/generic/content/scoring/find-top-candidate.js","../src/extractors/generic/content/scoring/index.js","../src/utils/dom/clean-tags.js","../src/utils/dom/clean-headers.js","../src/utils/dom/rewrite-top-level.js","../src/utils/dom/make-links-absolute.js","../src/utils/dom/link-density.js","../src/utils/dom/extract-from-meta.js","../src/utils/dom/extract-from-selectors.js","../src/utils/dom/strip-tags.js","../src/utils/dom/within-comment.js","../src/utils/dom/node-is-sufficient.js","../src/utils/dom/is-wordpress.js","../src/utils/dom/get-attrs.js","../src/utils/dom/set-attr.js","../src/utils/dom/set-attrs.js","../src/utils/dom/index.js","../src/resource/utils/dom/constants.js","../src/resource/utils/dom/convert-lazy-loaded-images.js","../src/resource/utils/dom/clean.js","../src/resource/index.js","../src/utils/merge-supported-domains.js","../src/extractors/custom/blogspot.com/index.js","../src/extractors/custom/nymag.com/index.js","../src/extractors/custom/wikipedia.org/index.js","../src/extractors/custom/twitter.com/index.js","../src/extractors/custom/www.nytimes.com/index.js","../src/extractors/custom/www.theatlantic.com/index.js","../src/extractors/custom/www.newyorker.com/index.js","../src/extractors/custom/www.wired.com/index.js","../src/extractors/custom/www.msn.com/index.js","../src/extractors/custom/www.yahoo.com/index.js","../src/extractors/custom/www.buzzfeed.com/index.js","../src/extractors/custom/fandom.wikia.com/index.js","../src/extractors/custom/www.littlethings.com/index.js","../src/extractors/custom/www.politico.com/index.js","../src/extractors/custom/deadspin.com/index.js","../src/extractors/custom/www.broadwayworld.com/index.js","../src/extractors/custom/www.apartmenttherapy.com/index.js","../src/extractors/custom/medium.com/index.js","../src/extractors/custom/www.tmz.com/index.js","../src/extractors/custom/www.washingtonpost.com/index.js","../src/extractors/custom/www.huffingtonpost.com/index.js","../src/extractors/custom/newrepublic.com/index.js","../src/extractors/custom/money.cnn.com/index.js","../src/extractors/custom/www.theverge.com/index.js","../src/extractors/custom/www.cnn.com/index.js","../src/extractors/custom/www.aol.com/index.js","../src/extractors/custom/www.youtube.com/index.js","../src/extractors/custom/www.theguardian.com/index.js","../src/extractors/custom/www.sbnation.com/index.js","../src/extractors/custom/www.bloomberg.com/index.js","../src/extractors/custom/www.bustle.com/index.js","../src/extractors/custom/www.npr.org/index.js","../src/extractors/custom/www.recode.net/index.js","../src/extractors/custom/qz.com/index.js","../src/extractors/custom/www.dmagazine.com/index.js","../src/extractors/custom/www.reuters.com/index.js","../src/extractors/custom/mashable.com/index.js","../src/extractors/custom/www.chicagotribune.com/index.js","../src/extractors/custom/www.vox.com/index.js","../src/extractors/custom/news.nationalgeographic.com/index.js","../src/extractors/custom/www.nationalgeographic.com/index.js","../src/extractors/custom/www.latimes.com/index.js","../src/extractors/custom/pagesix.com/index.js","../src/extractors/custom/thefederalistpapers.org/index.js","../src/extractors/custom/www.cbssports.com/index.js","../src/extractors/custom/www.msnbc.com/index.js","../src/extractors/custom/www.thepoliticalinsider.com/index.js","../src/extractors/custom/www.mentalfloss.com/index.js","../src/extractors/custom/abcnews.go.com/index.js","../src/extractors/custom/www.nydailynews.com/index.js","../src/extractors/custom/www.cnbc.com/index.js","../src/extractors/custom/www.popsugar.com/index.js","../src/extractors/custom/observer.com/index.js","../src/extractors/custom/people.com/index.js","../src/extractors/custom/www.usmagazine.com/index.js","../src/extractors/custom/www.rollingstone.com/index.js","../src/extractors/custom/247sports.com/index.js","../src/extractors/custom/uproxx.com/index.js","../src/extractors/custom/www.eonline.com/index.js","../src/extractors/custom/www.miamiherald.com/index.js","../src/extractors/custom/www.refinery29.com/index.js","../src/extractors/custom/www.macrumors.com/index.js","../src/extractors/custom/www.androidcentral.com/index.js","../src/extractors/custom/www.si.com/index.js","../src/extractors/custom/www.rawstory.com/index.js","../src/extractors/custom/www.cnet.com/index.js","../src/extractors/custom/www.cinemablend.com/index.js","../src/extractors/custom/www.today.com/index.js","../src/extractors/custom/www.howtogeek.com/index.js","../src/extractors/custom/www.al.com/index.js","../src/extractors/custom/www.thepennyhoarder.com/index.js","../src/extractors/custom/www.westernjournalism.com/index.js","../src/extractors/custom/fusion.net/index.js","../src/extractors/custom/www.americanow.com/index.js","../src/extractors/custom/sciencefly.com/index.js","../src/extractors/custom/hellogiggles.com/index.js","../src/extractors/custom/thoughtcatalog.com/index.js","../src/extractors/custom/www.nj.com/index.js","../src/extractors/custom/www.inquisitr.com/index.js","../src/extractors/custom/www.nbcnews.com/index.js","../src/extractors/custom/fortune.com/index.js","../src/extractors/custom/www.linkedin.com/index.js","../src/extractors/custom/obamawhitehouse.archives.gov/index.js","../src/extractors/custom/www.opposingviews.com/index.js","../src/extractors/custom/www.prospectmagazine.co.uk/index.js","../src/extractors/custom/forward.com/index.js","../src/extractors/custom/www.qdaily.com/index.js","../src/extractors/custom/gothamist.com/index.js","../src/extractors/custom/www.fool.com/index.js","../src/extractors/custom/www.slate.com/index.js","../src/extractors/custom/ici.radio-canada.ca/index.js","../src/extractors/all.js","../src/cleaners/constants.js","../src/cleaners/author.js","../src/cleaners/lead-image-url.js","../src/cleaners/dek.js","../src/cleaners/date-published.js","../src/cleaners/content.js","../src/cleaners/title.js","../src/cleaners/resolve-split-title.js","../src/cleaners/index.js","../src/extractors/generic/content/extract-best-node.js","../src/extractors/generic/content/extractor.js","../src/extractors/generic/title/constants.js","../src/extractors/generic/title/extractor.js","../src/extractors/generic/author/constants.js","../src/extractors/generic/author/extractor.js","../src/extractors/generic/date-published/constants.js","../src/extractors/generic/date-published/extractor.js","../src/extractors/generic/dek/extractor.js","../src/extractors/generic/lead-image-url/constants.js","../src/extractors/generic/lead-image-url/score-image.js","../src/extractors/generic/lead-image-url/extractor.js","../src/extractors/generic/next-page-url/scoring/utils/score-similarity.js","../src/extractors/generic/next-page-url/scoring/utils/score-link-text.js","../src/extractors/generic/next-page-url/scoring/utils/score-page-in-link.js","../src/extractors/generic/next-page-url/scoring/constants.js","../src/extractors/generic/next-page-url/scoring/utils/score-extraneous-links.js","../src/extractors/generic/next-page-url/scoring/utils/score-by-parents.js","../src/extractors/generic/next-page-url/scoring/utils/score-prev-link.js","../src/extractors/generic/next-page-url/scoring/utils/should-score.js","../src/extractors/generic/next-page-url/scoring/utils/score-base-url.js","../src/extractors/generic/next-page-url/scoring/utils/score-next-link-text.js","../src/extractors/generic/next-page-url/scoring/utils/score-cap-links.js","../src/extractors/generic/next-page-url/scoring/score-links.js","../src/extractors/generic/next-page-url/extractor.js","../src/extractors/generic/url/constants.js","../src/extractors/generic/url/extractor.js","../src/extractors/generic/excerpt/constants.js","../src/extractors/generic/excerpt/extractor.js","../src/extractors/generic/word-count/extractor.js","../src/extractors/generic/index.js","../src/extractors/detect-by-html.js","../src/extractors/get-extractor.js","../src/extractors/root-extractor.js","../src/extractors/collect-all-pages.js","../src/mercury.js"],"sourcesContent":["const NORMALIZE_RE = /\\s{2,}/g;\n\nexport default function normalizeSpaces(text) {\n return text.replace(NORMALIZE_RE, ' ').trim();\n}\n","// Given a node type to search for, and a list of regular expressions,\n// look to see if this extraction can be found in the URL. Expects\n// that each expression in r_list will return group(1) as the proper\n// string to be cleaned.\n// Only used for date_published currently.\nexport default function extractFromUrl(url, regexList) {\n const matchRe = regexList.find(re => re.test(url));\n if (matchRe) {\n return matchRe.exec(url)[1];\n }\n\n return null;\n}\n","// An expression that looks to try to find the page digit within a URL, if\n// it exists.\n// Matches:\n// page=1\n// pg=1\n// p=1\n// paging=12\n// pag=7\n// pagination/1\n// paging/88\n// pa/83\n// p/11\n//\n// Does not match:\n// pg=102\n// page:2\nexport const PAGE_IN_HREF_RE = new RegExp('(page|paging|(p(a|g|ag)?(e|enum|ewanted|ing|ination)))?(=|/)([0-9]{1,3})', 'i');\n\nexport const HAS_ALPHA_RE = /[a-z]/i;\n\nexport const IS_ALPHA_RE = /^[a-z]+$/i;\nexport const IS_DIGIT_RE = /^[0-9]+$/i;\n\nexport const ENCODING_RE = /charset=([\\w-]+)\\b/;\nexport const DEFAULT_ENCODING = 'utf-8';\n","import { PAGE_IN_HREF_RE } from './constants';\n\nexport default function pageNumFromUrl(url) {\n const matches = url.match(PAGE_IN_HREF_RE);\n if (!matches) return null;\n\n const pageNum = parseInt(matches[6], 10);\n\n // Return pageNum < 100, otherwise\n // return null\n return pageNum < 100 ? pageNum : null;\n}\n","export default function removeAnchor(url) {\n return url.split('#')[0].replace(/\\/$/, '');\n}\n","import URL from 'url';\n\nimport {\n HAS_ALPHA_RE,\n IS_ALPHA_RE,\n IS_DIGIT_RE,\n PAGE_IN_HREF_RE,\n} from './constants';\n\nfunction isGoodSegment(segment, index, firstSegmentHasLetters) {\n let goodSegment = true;\n\n // If this is purely a number, and it's the first or second\n // url_segment, it's probably a page number. Remove it.\n if (index < 2 && IS_DIGIT_RE.test(segment) && segment.length < 3) {\n goodSegment = true;\n }\n\n // If this is the first url_segment and it's just \"index\",\n // remove it\n if (index === 0 && segment.toLowerCase() === 'index') {\n goodSegment = false;\n }\n\n // If our first or second url_segment is smaller than 3 characters,\n // and the first url_segment had no alphas, remove it.\n if (index < 2 && segment.length < 3 && !firstSegmentHasLetters) {\n goodSegment = false;\n }\n\n return goodSegment;\n}\n\n// Take a URL, and return the article base of said URL. That is, no\n// pagination data exists in it. Useful for comparing to other links\n// that might have pagination data within them.\nexport default function articleBaseUrl(url, parsed) {\n const parsedUrl = parsed || URL.parse(url);\n const { protocol, host, path } = parsedUrl;\n\n let firstSegmentHasLetters = false;\n const cleanedSegments = path.split('/')\n .reverse()\n .reduce((acc, rawSegment, index) => {\n let segment = rawSegment;\n\n // Split off and save anything that looks like a file type.\n if (segment.includes('.')) {\n const [possibleSegment, fileExt] = segment.split('.');\n if (IS_ALPHA_RE.test(fileExt)) {\n segment = possibleSegment;\n }\n }\n\n // If our first or second segment has anything looking like a page\n // number, remove it.\n if (PAGE_IN_HREF_RE.test(segment) && index < 2) {\n segment = segment.replace(PAGE_IN_HREF_RE, '');\n }\n\n // If we're on the first segment, check to see if we have any\n // characters in it. The first segment is actually the last bit of\n // the URL, and this will be helpful to determine if we're on a URL\n // segment that looks like \"/2/\" for example.\n if (index === 0) {\n firstSegmentHasLetters = HAS_ALPHA_RE.test(segment);\n }\n\n // If it's not marked for deletion, push it to cleaned_segments.\n if (isGoodSegment(segment, index, firstSegmentHasLetters)) {\n acc.push(segment);\n }\n\n return acc;\n }, []);\n\n return `${protocol}//${host}${cleanedSegments.reverse().join('/')}`;\n}\n","// Given a string, return True if it appears to have an ending sentence\n// within it, false otherwise.\nconst SENTENCE_END_RE = new RegExp('.( |$)');\nexport default function hasSentenceEnd(text) {\n return SENTENCE_END_RE.test(text);\n}\n","export default function excerptContent(content, words = 10) {\n return content.trim()\n .split(/\\s+/)\n .slice(0, words)\n .join(' ');\n}\n","import iconv from 'iconv-lite';\nimport { DEFAULT_ENCODING, ENCODING_RE } from './constants';\n\n// check a string for encoding; this is\n// used in our fetchResource function to\n// ensure correctly encoded responses\nexport default function getEncoding(str) {\n let encoding = DEFAULT_ENCODING;\n if (ENCODING_RE.test(str)) {\n const testEncode = ENCODING_RE.exec(str)[1];\n if (iconv.encodingExists(testEncode)) {\n encoding = testEncode;\n }\n }\n return encoding;\n}\n","export default function* range(start = 1, end = 1) {\n while (start <= end) {\n yield start += 1;\n }\n}\n","// extremely simple url validation as a first step\nexport default function validateUrl({ hostname }) {\n // If this isn't a valid url, return an error message\n return !!hostname;\n}\n","const Errors = {\n badUrl: {\n error: true,\n messages: 'The url parameter passed does not look like a valid URL. Please check your data and try again.',\n },\n};\n\nexport default Errors;\n","import cheerio from 'cheerio';\n\n// Browser does not like us setting user agent\nexport const REQUEST_HEADERS = cheerio.browser ? {} : {\n 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.36',\n};\n\n// The number of milliseconds to attempt to fetch a resource before timing out.\nexport const FETCH_TIMEOUT = 10000;\n\n// Content types that we do not extract content from\nconst BAD_CONTENT_TYPES = [\n 'audio/mpeg',\n 'image/gif',\n 'image/jpeg',\n 'image/jpg',\n];\n\nexport const BAD_CONTENT_TYPES_RE = new RegExp(`^(${BAD_CONTENT_TYPES.join('|')})$`, 'i');\n\n// Use this setting as the maximum size an article can be\n// for us to attempt parsing. Defaults to 5 MB.\nexport const MAX_CONTENT_LENGTH = 5242880;\n\n// Turn the global proxy on or off\n// Proxying is not currently enabled in Python source\n// so not implementing logic in port.\nexport const PROXY_DOMAINS = false;\nexport const REQUESTS_PROXIES = {\n http: 'http://38.98.105.139:33333',\n https: 'http://38.98.105.139:33333',\n};\n\nexport const DOMAINS_TO_PROXY = [\n 'nih.gov',\n 'gutenberg.org',\n];\n","import URL from 'url';\nimport request from 'request';\nimport { Errors } from 'utils';\n\nimport {\n REQUEST_HEADERS,\n FETCH_TIMEOUT,\n BAD_CONTENT_TYPES_RE,\n MAX_CONTENT_LENGTH,\n} from './constants';\n\nfunction get(options) {\n return new Promise((resolve, reject) => {\n request(options, (err, response, body) => {\n if (err) {\n reject(err);\n } else {\n resolve({ body, response });\n }\n });\n });\n}\n\n// Evaluate a response to ensure it's something we should be keeping.\n// This does not validate in the sense of a response being 200 level or\n// not. Validation here means that we haven't found reason to bail from\n// further processing of this url.\n\nexport function validateResponse(response, parseNon2xx = false) {\n // Check if we got a valid status code\n // This isn't great, but I'm requiring a statusMessage to be set\n // before short circuiting b/c nock doesn't set it in tests\n // statusMessage only not set in nock response, in which case\n // I check statusCode, which is currently only 200 for OK responses\n // in tests\n if (\n (response.statusMessage && response.statusMessage !== 'OK') ||\n response.statusCode !== 200\n ) {\n if (!response.statusCode) {\n throw new Error(\n `Unable to fetch content. Original exception was ${response.error}`\n );\n } else if (!parseNon2xx) {\n throw new Error(\n `Resource returned a response status code of ${response.statusCode} and resource was instructed to reject non-2xx level status codes.`\n );\n }\n }\n\n const {\n 'content-type': contentType,\n 'content-length': contentLength,\n } = response.headers;\n\n // Check that the content is not in BAD_CONTENT_TYPES\n if (BAD_CONTENT_TYPES_RE.test(contentType)) {\n throw new Error(\n `Content-type for this resource was ${contentType} and is not allowed.`\n );\n }\n\n // Check that the content length is below maximum\n if (contentLength > MAX_CONTENT_LENGTH) {\n throw new Error(\n `Content for this resource was too large. Maximum content length is ${MAX_CONTENT_LENGTH}.`\n );\n }\n\n return true;\n}\n\n// Grabs the last two pieces of the URL and joins them back together\n// This is to get the 'livejournal.com' from 'erotictrains.livejournal.com'\nexport function baseDomain({ host }) {\n return host.split('.').slice(-2).join('.');\n}\n\n// Set our response attribute to the result of fetching our URL.\n// TODO: This should gracefully handle timeouts and raise the\n// proper exceptions on the many failure cases of HTTP.\n// TODO: Ensure we are not fetching something enormous. Always return\n// unicode content for HTML, with charset conversion.\n\nexport default async function fetchResource(url, parsedUrl) {\n parsedUrl = parsedUrl || URL.parse(encodeURI(url));\n\n const options = {\n url: parsedUrl.href,\n headers: { ...REQUEST_HEADERS },\n timeout: FETCH_TIMEOUT,\n // Accept cookies\n jar: true,\n // Set to null so the response returns as binary and body as buffer\n // https://github.com/request/request#requestoptions-callback\n encoding: null,\n // Accept and decode gzip\n gzip: true,\n // Follow any redirect\n followAllRedirects: true,\n };\n\n const { response, body } = await get(options);\n\n try {\n validateResponse(response);\n return {\n body,\n response,\n };\n } catch (e) {\n return Errors.badUrl;\n }\n}\n","function convertMetaProp($, from, to) {\n $(`meta[${from}]`).each((_, node) => {\n const $node = $(node);\n\n const value = $node.attr(from);\n $node.attr(to, value);\n $node.removeAttr(from);\n });\n\n return $;\n}\n\n// For ease of use in extracting from meta tags,\n// replace the \"content\" attribute on meta tags with the\n// \"value\" attribute.\n//\n// In addition, normalize 'property' attributes to 'name' for ease of\n// querying later. See, e.g., og or twitter meta tags.\n\nexport default function normalizeMetaTags($) {\n $ = convertMetaProp($, 'content', 'value');\n $ = convertMetaProp($, 'property', 'name');\n return $;\n}\n","// Spacer images to be removed\nexport const SPACER_RE = new RegExp('transparent|spacer|blank', 'i');\n\n// The class we will use to mark elements we want to keep\n// but would normally remove\nexport const KEEP_CLASS = 'mercury-parser-keep';\n\nexport const KEEP_SELECTORS = [\n 'iframe[src^=\"https://www.youtube.com\"]',\n 'iframe[src^=\"https://www.youtube-nocookie.com\"]',\n 'iframe[src^=\"http://www.youtube.com\"]',\n 'iframe[src^=\"https://player.vimeo\"]',\n 'iframe[src^=\"http://player.vimeo\"]',\n];\n\n// A list of tags to strip from the output if we encounter them.\nexport const STRIP_OUTPUT_TAGS = [\n 'title',\n 'script',\n 'noscript',\n 'link',\n 'style',\n 'hr',\n 'embed',\n 'iframe',\n 'object',\n];\n\n// cleanAttributes\nexport const REMOVE_ATTRS = ['style', 'align'];\nexport const REMOVE_ATTR_SELECTORS = REMOVE_ATTRS.map(selector => `[${selector}]`);\nexport const REMOVE_ATTR_LIST = REMOVE_ATTRS.join(',');\nexport const WHITELIST_ATTRS = [\n 'src',\n 'srcset',\n 'href',\n 'class',\n 'id',\n 'alt',\n 'xlink:href',\n 'width',\n 'height',\n];\n\nexport const WHITELIST_ATTRS_RE = new RegExp(`^(${WHITELIST_ATTRS.join('|')})$`, 'i');\n\n// removeEmpty\nexport const REMOVE_EMPTY_TAGS = ['p'];\nexport const REMOVE_EMPTY_SELECTORS = REMOVE_EMPTY_TAGS.map(tag => `${tag}:empty`).join(',');\n\n// cleanTags\nexport const CLEAN_CONDITIONALLY_TAGS = ['ul', 'ol', 'table', 'div', 'button', 'form'].join(',');\n\n// cleanHeaders\nconst HEADER_TAGS = ['h2', 'h3', 'h4', 'h5', 'h6'];\nexport const HEADER_TAG_LIST = HEADER_TAGS.join(',');\n\n// // CONTENT FETCHING CONSTANTS ////\n\n// A list of strings that can be considered unlikely candidates when\n// extracting content from a resource. These strings are joined together\n// and then tested for existence using re:test, so may contain simple,\n// non-pipe style regular expression queries if necessary.\nexport const UNLIKELY_CANDIDATES_BLACKLIST = [\n 'ad-break',\n 'adbox',\n 'advert',\n 'addthis',\n 'agegate',\n 'aux',\n 'blogger-labels',\n 'combx',\n 'comment',\n 'conversation',\n 'disqus',\n 'entry-unrelated',\n 'extra',\n 'foot',\n // 'form', // This is too generic, has too many false positives\n 'header',\n 'hidden',\n 'loader',\n 'login', // Note: This can hit 'blogindex'.\n 'menu',\n 'meta',\n 'nav',\n 'outbrain',\n 'pager',\n 'pagination',\n 'predicta', // readwriteweb inline ad box\n 'presence_control_external', // lifehacker.com container full of false positives\n 'popup',\n 'printfriendly',\n 'related',\n 'remove',\n 'remark',\n 'rss',\n 'share',\n 'shoutbox',\n 'sidebar',\n 'sociable',\n 'sponsor',\n 'taboola',\n 'tools',\n];\n\n// A list of strings that can be considered LIKELY candidates when\n// extracting content from a resource. Essentially, the inverse of the\n// blacklist above - if something matches both blacklist and whitelist,\n// it is kept. This is useful, for example, if something has a className\n// of \"rss-content entry-content\". It matched 'rss', so it would normally\n// be removed, however, it's also the entry content, so it should be left\n// alone.\n//\n// These strings are joined together and then tested for existence using\n// re:test, so may contain simple, non-pipe style regular expression queries\n// if necessary.\nexport const UNLIKELY_CANDIDATES_WHITELIST = [\n 'and',\n 'article',\n 'body',\n 'blogindex',\n 'column',\n 'content',\n 'entry-content-asset',\n 'format', // misuse of form\n 'hfeed',\n 'hentry',\n 'hatom',\n 'main',\n 'page',\n 'posts',\n 'shadow',\n];\n\n// A list of tags which, if found inside, should cause a to NOT\n// be turned into a paragraph tag. Shallow div tags without these elements\n// should be turned into tags.\nexport const DIV_TO_P_BLOCK_TAGS = [\n 'a',\n 'blockquote',\n 'dl',\n 'div',\n 'img',\n 'p',\n 'pre',\n 'table',\n].join(',');\n\n// A list of tags that should be ignored when trying to find the top candidate\n// for a document.\nexport const NON_TOP_CANDIDATE_TAGS = [\n 'br',\n 'b',\n 'i',\n 'label',\n 'hr',\n 'area',\n 'base',\n 'basefont',\n 'input',\n 'img',\n 'link',\n 'meta',\n];\n\nexport const NON_TOP_CANDIDATE_TAGS_RE =\n new RegExp(`^(${NON_TOP_CANDIDATE_TAGS.join('|')})$`, 'i');\n\n// A list of selectors that specify, very clearly, either hNews or other\n// very content-specific style content, like Blogger templates.\n// More examples here: http://microformats.org/wiki/blog-post-formats\nexport const HNEWS_CONTENT_SELECTORS = [\n ['.hentry', '.entry-content'],\n ['entry', '.entry-content'],\n ['.entry', '.entry_content'],\n ['.post', '.postbody'],\n ['.post', '.post_body'],\n ['.post', '.post-body'],\n];\n\nexport const PHOTO_HINTS = [\n 'figure',\n 'photo',\n 'image',\n 'caption',\n];\nexport const PHOTO_HINTS_RE = new RegExp(PHOTO_HINTS.join('|'), 'i');\n\n// A list of strings that denote a positive scoring for this content as being\n// an article container. Checked against className and id.\n//\n// TODO: Perhaps have these scale based on their odds of being quality?\nexport const POSITIVE_SCORE_HINTS = [\n 'article',\n 'articlecontent',\n 'instapaper_body',\n 'blog',\n 'body',\n 'content',\n 'entry-content-asset',\n 'entry',\n 'hentry',\n 'main',\n 'Normal',\n 'page',\n 'pagination',\n 'permalink',\n 'post',\n 'story',\n 'text',\n '[-_]copy', // usatoday\n '\\\\Bcopy',\n];\n\n// The above list, joined into a matching regular expression\nexport const POSITIVE_SCORE_RE = new RegExp(POSITIVE_SCORE_HINTS.join('|'), 'i');\n\n// Readability publisher-specific guidelines\nexport const READABILITY_ASSET = new RegExp('entry-content-asset', 'i');\n\n// A list of strings that denote a negative scoring for this content as being\n// an article container. Checked against className and id.\n//\n// TODO: Perhaps have these scale based on their odds of being quality?\nexport const NEGATIVE_SCORE_HINTS = [\n 'adbox',\n 'advert',\n 'author',\n 'bio',\n 'bookmark',\n 'bottom',\n 'byline',\n 'clear',\n 'com-',\n 'combx',\n 'comment',\n 'comment\\\\B',\n 'contact',\n 'copy',\n 'credit',\n 'crumb',\n 'date',\n 'deck',\n 'excerpt',\n 'featured', // tnr.com has a featured_content which throws us off\n 'foot',\n 'footer',\n 'footnote',\n 'graf',\n 'head',\n 'info',\n 'infotext', // newscientist.com copyright\n 'instapaper_ignore',\n 'jump',\n 'linebreak',\n 'link',\n 'masthead',\n 'media',\n 'meta',\n 'modal',\n 'outbrain', // slate.com junk\n 'promo',\n 'pr_', // autoblog - press release\n 'related',\n 'respond',\n 'roundcontent', // lifehacker restricted content warning\n 'scroll',\n 'secondary',\n 'share',\n 'shopping',\n 'shoutbox',\n 'side',\n 'sidebar',\n 'sponsor',\n 'stamp',\n 'sub',\n 'summary',\n 'tags',\n 'tools',\n 'widget',\n];\n// The above list, joined into a matching regular expression\nexport const NEGATIVE_SCORE_RE = new RegExp(NEGATIVE_SCORE_HINTS.join('|'), 'i');\n\n// XPath to try to determine if a page is wordpress. Not always successful.\nexport const IS_WP_SELECTOR = 'meta[name=generator][value^=WordPress]';\n\n// Match a digit. Pretty clear.\nexport const DIGIT_RE = new RegExp('[0-9]');\n\n// A list of words that, if found in link text or URLs, likely mean that\n// this link is not a next page link.\nexport const EXTRANEOUS_LINK_HINTS = [\n 'print',\n 'archive',\n 'comment',\n 'discuss',\n 'e-mail',\n 'email',\n 'share',\n 'reply',\n 'all',\n 'login',\n 'sign',\n 'single',\n 'adx',\n 'entry-unrelated',\n];\nexport const EXTRANEOUS_LINK_HINTS_RE = new RegExp(EXTRANEOUS_LINK_HINTS.join('|'), 'i');\n\n// Match any phrase that looks like it could be page, or paging, or pagination\nexport const PAGE_RE = new RegExp('pag(e|ing|inat)', 'i');\n\n// Match any link text/classname/id that looks like it could mean the next\n// page. Things like: next, continue, >, >>, » but not >|, »| as those can\n// mean last page.\n// export const NEXT_LINK_TEXT_RE = new RegExp('(next|weiter|continue|>([^\\|]|$)|»([^\\|]|$))', 'i');\nexport const NEXT_LINK_TEXT_RE = /(next|weiter|continue|>([^|]|$)|»([^|]|$))/i;\n\n// Match any link text/classname/id that looks like it is an end link: things\n// like \"first\", \"last\", \"end\", etc.\nexport const CAP_LINK_TEXT_RE = new RegExp('(first|last|end)', 'i');\n\n// Match any link text/classname/id that looks like it means the previous\n// page.\nexport const PREV_LINK_TEXT_RE = new RegExp('(prev|earl|old|new|<|«)', 'i');\n\n// Match 2 or more consecutive tags\nexport const BR_TAGS_RE = new RegExp('( ]*>[ \\n\\r\\t]*){2,}', 'i');\n\n// Match 1 BR tag.\nexport const BR_TAG_RE = new RegExp(' ]*>', 'i');\n\n// A list of all of the block level tags known in HTML5 and below. Taken from\n// http://bit.ly/qneNIT\nexport const BLOCK_LEVEL_TAGS = [\n 'article',\n 'aside',\n 'blockquote',\n 'body',\n 'br',\n 'button',\n 'canvas',\n 'caption',\n 'col',\n 'colgroup',\n 'dd',\n 'div',\n 'dl',\n 'dt',\n 'embed',\n 'fieldset',\n 'figcaption',\n 'figure',\n 'footer',\n 'form',\n 'h1',\n 'h2',\n 'h3',\n 'h4',\n 'h5',\n 'h6',\n 'header',\n 'hgroup',\n 'hr',\n 'li',\n 'map',\n 'object',\n 'ol',\n 'output',\n 'p',\n 'pre',\n 'progress',\n 'section',\n 'table',\n 'tbody',\n 'textarea',\n 'tfoot',\n 'th',\n 'thead',\n 'tr',\n 'ul',\n 'video',\n];\nexport const BLOCK_LEVEL_TAGS_RE = new RegExp(`^(${BLOCK_LEVEL_TAGS.join('|')})$`, 'i');\n\n// The removal is implemented as a blacklist and whitelist, this test finds\n// blacklisted elements that aren't whitelisted. We do this all in one\n// expression-both because it's only one pass, and because this skips the\n// serialization for whitelisted nodes.\nconst candidatesBlacklist = UNLIKELY_CANDIDATES_BLACKLIST.join('|');\nexport const CANDIDATES_BLACKLIST = new RegExp(candidatesBlacklist, 'i');\n\nconst candidatesWhitelist = UNLIKELY_CANDIDATES_WHITELIST.join('|');\nexport const CANDIDATES_WHITELIST = new RegExp(candidatesWhitelist, 'i');\n\nexport const UNLIKELY_RE = new RegExp(`!(${candidatesWhitelist})|(${candidatesBlacklist})`, 'i');\n\nexport const PARAGRAPH_SCORE_TAGS = new RegExp('^(p|li|span|pre)$', 'i');\nexport const CHILD_CONTENT_TAGS = new RegExp('^(td|blockquote|ol|ul|dl)$', 'i');\nexport const BAD_TAGS = new RegExp('^(address|form)$', 'i');\n\nexport const HTML_OR_BODY_RE = new RegExp('^(html|body)$', 'i');\n","import {\n CANDIDATES_WHITELIST,\n CANDIDATES_BLACKLIST,\n} from './constants';\n\nexport default function stripUnlikelyCandidates($) {\n // Loop through the provided document and remove any non-link nodes\n // that are unlikely candidates for article content.\n //\n // Links are ignored because there are very often links to content\n // that are identified as non-body-content, but may be inside\n // article-like content.\n //\n // :param $: a cheerio object to strip nodes from\n // :return $: the cleaned cheerio object\n $('*').not('a').each((index, node) => {\n const $node = $(node);\n const classes = $node.attr('class');\n const id = $node.attr('id');\n if (!id && !classes) return;\n\n const classAndId = `${classes || ''} ${id || ''}`;\n if (CANDIDATES_WHITELIST.test(classAndId)) {\n return;\n } else if (CANDIDATES_BLACKLIST.test(classAndId)) {\n $node.remove();\n }\n });\n\n return $;\n}\n","import { paragraphize } from './index';\n\n// ## NOTES:\n// Another good candidate for refactoring/optimizing.\n// Very imperative code, I don't love it. - AP\n\n// Given cheerio object, convert consecutive tags into\n// tags instead.\n//\n// :param $: A cheerio object\n\nexport default function brsToPs($) {\n let collapsing = false;\n $('br').each((index, element) => {\n const $element = $(element);\n const nextElement = $element.next().get(0);\n\n if (nextElement && nextElement.tagName.toLowerCase() === 'br') {\n collapsing = true;\n $element.remove();\n } else if (collapsing) {\n collapsing = false;\n // $(element).replaceWith('')\n paragraphize(element, $, true);\n }\n });\n\n return $;\n}\n","import { BLOCK_LEVEL_TAGS_RE } from './constants';\n\n// Given a node, turn it into a P if it is not already a P, and\n// make sure it conforms to the constraints of a P tag (I.E. does\n// not contain any other block tags.)\n//\n// If the node is a , it treats the following inline siblings\n// as if they were its children.\n//\n// :param node: The node to paragraphize; this is a raw node\n// :param $: The cheerio object to handle dom manipulation\n// :param br: Whether or not the passed node is a br\n\nexport default function paragraphize(node, $, br = false) {\n const $node = $(node);\n\n if (br) {\n let sibling = node.nextSibling;\n const p = $('');\n\n // while the next node is text or not a block level element\n // append it to a new p node\n while (sibling && !(sibling.tagName && BLOCK_LEVEL_TAGS_RE.test(sibling.tagName))) {\n const nextSibling = sibling.nextSibling;\n $(sibling).appendTo(p);\n sibling = nextSibling;\n }\n\n $node.replaceWith(p);\n $node.remove();\n return $;\n }\n\n return $;\n}\n","import { brsToPs, convertNodeTo } from 'utils/dom';\n\nimport { DIV_TO_P_BLOCK_TAGS } from './constants';\n\nfunction convertDivs($) {\n $('div').each((index, div) => {\n const $div = $(div);\n const convertable = $div.children(DIV_TO_P_BLOCK_TAGS).length === 0;\n\n if (convertable) {\n convertNodeTo($div, $, 'p');\n }\n });\n\n return $;\n}\n\nfunction convertSpans($) {\n $('span').each((index, span) => {\n const $span = $(span);\n const convertable = $span.parents('p, div').length === 0;\n if (convertable) {\n convertNodeTo($span, $, 'p');\n }\n });\n\n return $;\n}\n\n// Loop through the provided doc, and convert any p-like elements to\n// actual paragraph tags.\n//\n// Things fitting this criteria:\n// * Multiple consecutive tags.\n// * tags without block level elements inside of them\n// * tags who are not children of or tags.\n//\n// :param $: A cheerio object to search\n// :return cheerio object with new p elements\n// (By-reference mutation, though. Returned just for convenience.)\n\nexport default function convertToParagraphs($) {\n $ = brsToPs($);\n $ = convertDivs($);\n $ = convertSpans($);\n\n return $;\n}\n","import { getAttrs } from 'utils/dom';\n\nexport default function convertNodeTo($node, $, tag = 'p') {\n const node = $node.get(0);\n if (!node) {\n return $;\n }\n const attrs = getAttrs(node) || {};\n // console.log(attrs)\n\n const attribString = Reflect.ownKeys(attrs)\n .map(key => `${key}=${attrs[key]}`)\n .join(' ');\n let html;\n\n if ($.browser) {\n // In the browser, the contents of noscript tags aren't rendered, therefore\n // transforms on the noscript tag (commonly used for lazy-loading) don't work\n // as expected. This test case handles that\n html = node.tagName.toLowerCase() === 'noscript' ? $node.text() : $node.html();\n } else {\n html = $node.contents();\n }\n $node.replaceWith(\n `<${tag} ${attribString}>${html}${tag}>`\n );\n return $;\n}\n","import { SPACER_RE } from './constants';\n\nfunction cleanForHeight($img, $) {\n const height = parseInt($img.attr('height'), 10);\n const width = parseInt($img.attr('width'), 10) || 20;\n\n // Remove images that explicitly have very small heights or\n // widths, because they are most likely shims or icons,\n // which aren't very useful for reading.\n if ((height || 20) < 10 || width < 10) {\n $img.remove();\n } else if (height) {\n // Don't ever specify a height on images, so that we can\n // scale with respect to width without screwing up the\n // aspect ratio.\n $img.removeAttr('height');\n }\n\n return $;\n}\n\n// Cleans out images where the source string matches transparent/spacer/etc\n// TODO This seems very aggressive - AP\nfunction removeSpacers($img, $) {\n if (SPACER_RE.test($img.attr('src'))) {\n $img.remove();\n }\n\n return $;\n}\n\nexport default function cleanImages($article, $) {\n $article.find('img').each((index, img) => {\n const $img = $(img);\n\n cleanForHeight($img, $);\n removeSpacers($img, $);\n });\n\n return $;\n}\n","import URL from 'url';\n\nimport {\n KEEP_SELECTORS,\n KEEP_CLASS,\n} from './constants';\n\nexport default function markToKeep(article, $, url, tags = []) {\n if (tags.length === 0) {\n tags = KEEP_SELECTORS;\n }\n\n if (url) {\n const { protocol, hostname } = URL.parse(url);\n tags = [...tags, `iframe[src^=\"${protocol}//${hostname}\"]`];\n }\n\n $(tags.join(','), article).addClass(KEEP_CLASS);\n\n return $;\n}\n","import {\n STRIP_OUTPUT_TAGS,\n KEEP_CLASS,\n} from './constants';\n\nexport default function stripJunkTags(article, $, tags = []) {\n if (tags.length === 0) {\n tags = STRIP_OUTPUT_TAGS;\n }\n\n // Remove matching elements, but ignore\n // any element with a class of mercury-parser-keep\n $(tags.join(','), article).not(`.${KEEP_CLASS}`).remove();\n\n return $;\n}\n","import { convertNodeTo } from 'utils/dom';\n\n// H1 tags are typically the article title, which should be extracted\n// by the title extractor instead. If there's less than 3 of them (<3),\n// strip them. Otherwise, turn 'em into H2s.\nexport default function cleanHOnes(article, $) {\n const $hOnes = $('h1', article);\n\n if ($hOnes.length < 3) {\n $hOnes.each((index, node) => $(node).remove());\n } else {\n $hOnes.each((index, node) => {\n convertNodeTo($(node), $, 'h2');\n });\n }\n\n return $;\n}\n","import {\n getAttrs,\n setAttrs,\n} from 'utils/dom';\n\nimport {\n WHITELIST_ATTRS_RE,\n KEEP_CLASS,\n} from './constants';\n\nfunction removeAllButWhitelist($article, $) {\n $article.find('*').each((index, node) => {\n const attrs = getAttrs(node);\n\n setAttrs(node, Reflect.ownKeys(attrs).reduce((acc, attr) => {\n if (WHITELIST_ATTRS_RE.test(attr)) {\n return { ...acc, [attr]: attrs[attr] };\n }\n\n return acc;\n }, {}));\n });\n\n // Remove the mercury-parser-keep class from result\n $(`.${KEEP_CLASS}`, $article).removeClass(KEEP_CLASS);\n\n return $article;\n}\n\n// function removeAttrs(article, $) {\n// REMOVE_ATTRS.forEach((attr) => {\n// $(`[${attr}]`, article).removeAttr(attr);\n// });\n// }\n\n// Remove attributes like style or align\nexport default function cleanAttributes($article, $) {\n // Grabbing the parent because at this point\n // $article will be wrapped in a div which will\n // have a score set on it.\n return removeAllButWhitelist(\n $article.parent().length ? $article.parent() : $article,\n $,\n );\n}\n","export default function removeEmpty($article, $) {\n $article.find('p').each((index, p) => {\n const $p = $(p);\n if ($p.find('iframe, img').length === 0 && $p.text().trim() === '') $p.remove();\n });\n\n return $;\n}\n","// // CONTENT FETCHING CONSTANTS ////\n\n// A list of strings that can be considered unlikely candidates when\n// extracting content from a resource. These strings are joined together\n// and then tested for existence using re:test, so may contain simple,\n// non-pipe style regular expression queries if necessary.\nexport const UNLIKELY_CANDIDATES_BLACKLIST = [\n 'ad-break',\n 'adbox',\n 'advert',\n 'addthis',\n 'agegate',\n 'aux',\n 'blogger-labels',\n 'combx',\n 'comment',\n 'conversation',\n 'disqus',\n 'entry-unrelated',\n 'extra',\n 'foot',\n 'form',\n 'header',\n 'hidden',\n 'loader',\n 'login', // Note: This can hit 'blogindex'.\n 'menu',\n 'meta',\n 'nav',\n 'pager',\n 'pagination',\n 'predicta', // readwriteweb inline ad box\n 'presence_control_external', // lifehacker.com container full of false positives\n 'popup',\n 'printfriendly',\n 'related',\n 'remove',\n 'remark',\n 'rss',\n 'share',\n 'shoutbox',\n 'sidebar',\n 'sociable',\n 'sponsor',\n 'tools',\n];\n\n// A list of strings that can be considered LIKELY candidates when\n// extracting content from a resource. Essentially, the inverse of the\n// blacklist above - if something matches both blacklist and whitelist,\n// it is kept. This is useful, for example, if something has a className\n// of \"rss-content entry-content\". It matched 'rss', so it would normally\n// be removed, however, it's also the entry content, so it should be left\n// alone.\n//\n// These strings are joined together and then tested for existence using\n// re:test, so may contain simple, non-pipe style regular expression queries\n// if necessary.\nexport const UNLIKELY_CANDIDATES_WHITELIST = [\n 'and',\n 'article',\n 'body',\n 'blogindex',\n 'column',\n 'content',\n 'entry-content-asset',\n 'format', // misuse of form\n 'hfeed',\n 'hentry',\n 'hatom',\n 'main',\n 'page',\n 'posts',\n 'shadow',\n];\n\n// A list of tags which, if found inside, should cause a to NOT\n// be turned into a paragraph tag. Shallow div tags without these elements\n// should be turned into tags.\nexport const DIV_TO_P_BLOCK_TAGS = [\n 'a',\n 'blockquote',\n 'dl',\n 'div',\n 'img',\n 'p',\n 'pre',\n 'table',\n].join(',');\n\n// A list of tags that should be ignored when trying to find the top candidate\n// for a document.\nexport const NON_TOP_CANDIDATE_TAGS = [\n 'br',\n 'b',\n 'i',\n 'label',\n 'hr',\n 'area',\n 'base',\n 'basefont',\n 'input',\n 'img',\n 'link',\n 'meta',\n];\n\nexport const NON_TOP_CANDIDATE_TAGS_RE =\n new RegExp(`^(${NON_TOP_CANDIDATE_TAGS.join('|')})$`, 'i');\n\n// A list of selectors that specify, very clearly, either hNews or other\n// very content-specific style content, like Blogger templates.\n// More examples here: http://microformats.org/wiki/blog-post-formats\nexport const HNEWS_CONTENT_SELECTORS = [\n ['.hentry', '.entry-content'],\n ['entry', '.entry-content'],\n ['.entry', '.entry_content'],\n ['.post', '.postbody'],\n ['.post', '.post_body'],\n ['.post', '.post-body'],\n];\n\nexport const PHOTO_HINTS = [\n 'figure',\n 'photo',\n 'image',\n 'caption',\n];\nexport const PHOTO_HINTS_RE = new RegExp(PHOTO_HINTS.join('|'), 'i');\n\n// A list of strings that denote a positive scoring for this content as being\n// an article container. Checked against className and id.\n//\n// TODO: Perhaps have these scale based on their odds of being quality?\nexport const POSITIVE_SCORE_HINTS = [\n 'article',\n 'articlecontent',\n 'instapaper_body',\n 'blog',\n 'body',\n 'content',\n 'entry-content-asset',\n 'entry',\n 'hentry',\n 'main',\n 'Normal',\n 'page',\n 'pagination',\n 'permalink',\n 'post',\n 'story',\n 'text',\n '[-_]copy', // usatoday\n '\\\\Bcopy',\n];\n\n// The above list, joined into a matching regular expression\nexport const POSITIVE_SCORE_RE = new RegExp(POSITIVE_SCORE_HINTS.join('|'), 'i');\n\n// Readability publisher-specific guidelines\nexport const READABILITY_ASSET = new RegExp('entry-content-asset', 'i');\n\n// A list of strings that denote a negative scoring for this content as being\n// an article container. Checked against className and id.\n//\n// TODO: Perhaps have these scale based on their odds of being quality?\nexport const NEGATIVE_SCORE_HINTS = [\n 'adbox',\n 'advert',\n 'author',\n 'bio',\n 'bookmark',\n 'bottom',\n 'byline',\n 'clear',\n 'com-',\n 'combx',\n 'comment',\n 'comment\\\\B',\n 'contact',\n 'copy',\n 'credit',\n 'crumb',\n 'date',\n 'deck',\n 'excerpt',\n 'featured', // tnr.com has a featured_content which throws us off\n 'foot',\n 'footer',\n 'footnote',\n 'graf',\n 'head',\n 'info',\n 'infotext', // newscientist.com copyright\n 'instapaper_ignore',\n 'jump',\n 'linebreak',\n 'link',\n 'masthead',\n 'media',\n 'meta',\n 'modal',\n 'outbrain', // slate.com junk\n 'promo',\n 'pr_', // autoblog - press release\n 'related',\n 'respond',\n 'roundcontent', // lifehacker restricted content warning\n 'scroll',\n 'secondary',\n 'share',\n 'shopping',\n 'shoutbox',\n 'side',\n 'sidebar',\n 'sponsor',\n 'stamp',\n 'sub',\n 'summary',\n 'tags',\n 'tools',\n 'widget',\n];\n// The above list, joined into a matching regular expression\nexport const NEGATIVE_SCORE_RE = new RegExp(NEGATIVE_SCORE_HINTS.join('|'), 'i');\n\n// Match a digit. Pretty clear.\nexport const DIGIT_RE = new RegExp('[0-9]');\n\n// Match 2 or more consecutive tags\nexport const BR_TAGS_RE = new RegExp('( ]*>[ \\n\\r\\t]*){2,}', 'i');\n\n// Match 1 BR tag.\nexport const BR_TAG_RE = new RegExp(' ]*>', 'i');\n\n// A list of all of the block level tags known in HTML5 and below. Taken from\n// http://bit.ly/qneNIT\nexport const BLOCK_LEVEL_TAGS = [\n 'article',\n 'aside',\n 'blockquote',\n 'body',\n 'br',\n 'button',\n 'canvas',\n 'caption',\n 'col',\n 'colgroup',\n 'dd',\n 'div',\n 'dl',\n 'dt',\n 'embed',\n 'fieldset',\n 'figcaption',\n 'figure',\n 'footer',\n 'form',\n 'h1',\n 'h2',\n 'h3',\n 'h4',\n 'h5',\n 'h6',\n 'header',\n 'hgroup',\n 'hr',\n 'li',\n 'map',\n 'object',\n 'ol',\n 'output',\n 'p',\n 'pre',\n 'progress',\n 'section',\n 'table',\n 'tbody',\n 'textarea',\n 'tfoot',\n 'th',\n 'thead',\n 'tr',\n 'ul',\n 'video',\n];\nexport const BLOCK_LEVEL_TAGS_RE = new RegExp(`^(${BLOCK_LEVEL_TAGS.join('|')})$`, 'i');\n\n// The removal is implemented as a blacklist and whitelist, this test finds\n// blacklisted elements that aren't whitelisted. We do this all in one\n// expression-both because it's only one pass, and because this skips the\n// serialization for whitelisted nodes.\nconst candidatesBlacklist = UNLIKELY_CANDIDATES_BLACKLIST.join('|');\nexport const CANDIDATES_BLACKLIST = new RegExp(candidatesBlacklist, 'i');\n\nconst candidatesWhitelist = UNLIKELY_CANDIDATES_WHITELIST.join('|');\nexport const CANDIDATES_WHITELIST = new RegExp(candidatesWhitelist, 'i');\n\nexport const UNLIKELY_RE = new RegExp(`!(${candidatesWhitelist})|(${candidatesBlacklist})`, 'i');\n\nexport const PARAGRAPH_SCORE_TAGS = new RegExp('^(p|li|span|pre)$', 'i');\nexport const CHILD_CONTENT_TAGS = new RegExp('^(td|blockquote|ol|ul|dl)$', 'i');\nexport const BAD_TAGS = new RegExp('^(address|form)$', 'i');\n\nexport const HTML_OR_BODY_RE = new RegExp('^(html|body)$', 'i');\n","import {\n NEGATIVE_SCORE_RE,\n POSITIVE_SCORE_RE,\n PHOTO_HINTS_RE,\n READABILITY_ASSET,\n} from './constants';\n\n// Get the score of a node based on its className and id.\nexport default function getWeight(node) {\n const classes = node.attr('class');\n const id = node.attr('id');\n let score = 0;\n\n if (id) {\n // if id exists, try to score on both positive and negative\n if (POSITIVE_SCORE_RE.test(id)) {\n score += 25;\n }\n if (NEGATIVE_SCORE_RE.test(id)) {\n score -= 25;\n }\n }\n\n if (classes) {\n if (score === 0) {\n // if classes exist and id did not contribute to score\n // try to score on both positive and negative\n if (POSITIVE_SCORE_RE.test(classes)) {\n score += 25;\n }\n if (NEGATIVE_SCORE_RE.test(classes)) {\n score -= 25;\n }\n }\n\n // even if score has been set by id, add score for\n // possible photo matches\n // \"try to keep photos if we can\"\n if (PHOTO_HINTS_RE.test(classes)) {\n score += 10;\n }\n\n // add 25 if class matches entry-content-asset,\n // a class apparently instructed for use in the\n // Readability publisher guidelines\n // https://www.readability.com/developers/guidelines\n if (READABILITY_ASSET.test(classes)) {\n score += 25;\n }\n }\n\n return score;\n}\n","// returns the score of a node based on\n// the node's score attribute\n// returns null if no score set\nexport default function getScore($node) {\n return parseFloat($node.attr('score')) || null;\n}\n","// return 1 for every comma in text\nexport default function scoreCommas(text) {\n return (text.match(/,/g) || []).length;\n}\n","const idkRe = new RegExp('^(p|pre)$', 'i');\n\nexport default function scoreLength(textLength, tagName = 'p') {\n const chunks = textLength / 50;\n\n if (chunks > 0) {\n let lengthBonus;\n\n // No idea why p or pre are being tamped down here\n // but just following the source for now\n // Not even sure why tagName is included here,\n // since this is only being called from the context\n // of scoreParagraph\n if (idkRe.test(tagName)) {\n lengthBonus = chunks - 2;\n } else {\n lengthBonus = chunks - 1.25;\n }\n\n return Math.min(Math.max(lengthBonus, 0), 3);\n }\n\n return 0;\n}\n","import {\n scoreCommas,\n scoreLength,\n} from './index';\n\n// Score a paragraph using various methods. Things like number of\n// commas, etc. Higher is better.\nexport default function scoreParagraph(node) {\n let score = 1;\n const text = node.text().trim();\n const textLength = text.length;\n\n // If this paragraph is less than 25 characters, don't count it.\n if (textLength < 25) {\n return 0;\n }\n\n // Add points for any commas within this paragraph\n score += scoreCommas(text);\n\n // For every 50 characters in this paragraph, add another point. Up\n // to 3 points.\n score += scoreLength(textLength);\n\n // Articles can end with short paragraphs when people are being clever\n // but they can also end with short paragraphs setting up lists of junk\n // that we strip. This negative tweaks junk setup paragraphs just below\n // the cutoff threshold.\n if (text.slice(-1) === ':') {\n score -= 1;\n }\n\n return score;\n}\n","export default function setScore($node, $, score) {\n $node.attr('score', score);\n return $node;\n}\n","import {\n getOrInitScore,\n setScore,\n} from './index';\n\nexport default function addScore($node, $, amount) {\n try {\n const score = getOrInitScore($node, $) + amount;\n setScore($node, $, score);\n } catch (e) {\n // Ignoring; error occurs in scoreNode\n }\n\n return $node;\n}\n","import { addScore } from './index';\n\n// Adds 1/4 of a child's score to its parent\nexport default function addToParent(node, $, score) {\n const parent = node.parent();\n if (parent) {\n addScore(parent, $, score * 0.25);\n }\n\n return node;\n}\n","import {\n getScore,\n scoreNode,\n getWeight,\n addToParent,\n} from './index';\n\n// gets and returns the score if it exists\n// if not, initializes a score based on\n// the node's tag type\nexport default function getOrInitScore($node, $, weightNodes = true) {\n let score = getScore($node);\n\n if (score) {\n return score;\n }\n\n score = scoreNode($node);\n\n if (weightNodes) {\n score += getWeight($node);\n }\n\n addToParent($node, $, score);\n\n return score;\n}\n","import { scoreParagraph } from './index';\nimport {\n PARAGRAPH_SCORE_TAGS,\n CHILD_CONTENT_TAGS,\n BAD_TAGS,\n} from './constants';\n\n// Score an individual node. Has some smarts for paragraphs, otherwise\n// just scores based on tag.\nexport default function scoreNode($node) {\n const { tagName } = $node.get(0);\n\n // TODO: Consider ordering by most likely.\n // E.g., if divs are a more common tag on a page,\n // Could save doing that regex test on every node – AP\n if (PARAGRAPH_SCORE_TAGS.test(tagName)) {\n return scoreParagraph($node);\n } else if (tagName.toLowerCase() === 'div') {\n return 5;\n } else if (CHILD_CONTENT_TAGS.test(tagName)) {\n return 3;\n } else if (BAD_TAGS.test(tagName)) {\n return -3;\n } else if (tagName.toLowerCase() === 'th') {\n return -5;\n }\n\n return 0;\n}\n","import { convertNodeTo } from 'utils/dom';\n\nimport { HNEWS_CONTENT_SELECTORS } from './constants';\nimport {\n scoreNode,\n setScore,\n getOrInitScore,\n addScore,\n} from './index';\n\nfunction convertSpans($node, $) {\n if ($node.get(0)) {\n const { tagName } = $node.get(0);\n\n if (tagName === 'span') {\n // convert spans to divs\n convertNodeTo($node, $, 'div');\n }\n }\n}\n\nfunction addScoreTo($node, $, score) {\n if ($node) {\n convertSpans($node, $);\n addScore($node, $, score);\n }\n}\n\nfunction scorePs($, weightNodes) {\n $('p, pre').not('[score]').each((index, node) => {\n // The raw score for this paragraph, before we add any parent/child\n // scores.\n let $node = $(node);\n $node = setScore($node, $, getOrInitScore($node, $, weightNodes));\n\n const $parent = $node.parent();\n const rawScore = scoreNode($node);\n\n addScoreTo($parent, $, rawScore, weightNodes);\n if ($parent) {\n // Add half of the individual content score to the\n // grandparent\n addScoreTo($parent.parent(), $, rawScore / 2, weightNodes);\n }\n });\n\n return $;\n}\n\n// score content. Parents get the full value of their children's\n// content score, grandparents half\nexport default function scoreContent($, weightNodes = true) {\n // First, look for special hNews based selectors and give them a big\n // boost, if they exist\n HNEWS_CONTENT_SELECTORS.forEach(([parentSelector, childSelector]) => {\n $(`${parentSelector} ${childSelector}`).each((index, node) => {\n addScore($(node).parent(parentSelector), $, 80);\n });\n });\n\n // Doubling this again\n // Previous solution caused a bug\n // in which parents weren't retaining\n // scores. This is not ideal, and\n // should be fixed.\n scorePs($, weightNodes);\n scorePs($, weightNodes);\n\n return $;\n}\n","import {\n textLength,\n linkDensity,\n} from 'utils/dom';\nimport { hasSentenceEnd } from 'utils/text';\n\nimport { NON_TOP_CANDIDATE_TAGS_RE } from './constants';\nimport { getScore } from './index';\n\n// Now that we have a top_candidate, look through the siblings of\n// it to see if any of them are decently scored. If they are, they\n// may be split parts of the content (Like two divs, a preamble and\n// a body.) Example:\n// http://articles.latimes.com/2009/oct/14/business/fi-bigtvs14\nexport default function mergeSiblings($candidate, topScore, $) {\n if (!$candidate.parent().length) {\n return $candidate;\n }\n\n const siblingScoreThreshold = Math.max(10, topScore * 0.25);\n const wrappingDiv = $('');\n\n $candidate.parent().children().each((index, sibling) => {\n const $sibling = $(sibling);\n // Ignore tags like BR, HR, etc\n if (NON_TOP_CANDIDATE_TAGS_RE.test(sibling.tagName)) {\n return null;\n }\n\n const siblingScore = getScore($sibling);\n if (siblingScore) {\n if ($sibling.get(0) === $candidate.get(0)) {\n wrappingDiv.append($sibling);\n } else {\n let contentBonus = 0;\n const density = linkDensity($sibling);\n\n // If sibling has a very low link density,\n // give it a small bonus\n if (density < 0.05) {\n contentBonus += 20;\n }\n\n // If sibling has a high link density,\n // give it a penalty\n if (density >= 0.5) {\n contentBonus -= 20;\n }\n\n // If sibling node has the same class as\n // candidate, give it a bonus\n if ($sibling.attr('class') === $candidate.attr('class')) {\n contentBonus += topScore * 0.2;\n }\n\n const newScore = siblingScore + contentBonus;\n\n if (newScore >= siblingScoreThreshold) {\n return wrappingDiv.append($sibling);\n } else if (sibling.tagName === 'p') {\n const siblingContent = $sibling.text();\n const siblingContentLength = textLength(siblingContent);\n\n if (siblingContentLength > 80 && density < 0.25) {\n return wrappingDiv.append($sibling);\n } else if (siblingContentLength <= 80 && density === 0 &&\n hasSentenceEnd(siblingContent)) {\n return wrappingDiv.append($sibling);\n }\n }\n }\n }\n\n return null;\n });\n\n if (wrappingDiv.children().length === 1 &&\n wrappingDiv.children().first().get(0) === $candidate.get(0)) {\n return $candidate;\n }\n\n return wrappingDiv;\n}\n","import { NON_TOP_CANDIDATE_TAGS_RE } from './constants';\nimport { getScore } from './index';\nimport mergeSiblings from './merge-siblings';\n\n// After we've calculated scores, loop through all of the possible\n// candidate nodes we found and find the one with the highest score.\nexport default function findTopCandidate($) {\n let $candidate;\n let topScore = 0;\n\n $('[score]').each((index, node) => {\n // Ignore tags like BR, HR, etc\n if (NON_TOP_CANDIDATE_TAGS_RE.test(node.tagName)) {\n return;\n }\n\n const $node = $(node);\n const score = getScore($node);\n\n if (score > topScore) {\n topScore = score;\n $candidate = $node;\n }\n });\n\n // If we don't have a candidate, return the body\n // or whatever the first element is\n if (!$candidate) {\n return $('body') || $('*').first();\n }\n\n $candidate = mergeSiblings($candidate, topScore, $);\n\n return $candidate;\n}\n","// Scoring\nexport { default as getWeight } from './get-weight';\nexport { default as getScore } from './get-score';\nexport { default as scoreCommas } from './score-commas';\nexport { default as scoreLength } from './score-length';\nexport { default as scoreParagraph } from './score-paragraph';\nexport { default as setScore } from './set-score';\nexport { default as addScore } from './add-score';\nexport { default as addToParent } from './add-to-parent';\nexport { default as getOrInitScore } from './get-or-init-score';\nexport { default as scoreNode } from './score-node';\nexport { default as scoreContent } from './score-content';\nexport { default as findTopCandidate } from './find-top-candidate';\n","import {\n getScore,\n setScore,\n getOrInitScore,\n scoreCommas,\n} from 'extractors/generic/content/scoring';\n\nimport {\n CLEAN_CONDITIONALLY_TAGS,\n KEEP_CLASS,\n} from './constants';\nimport { normalizeSpaces } from '../text';\nimport { linkDensity } from './index';\n\nfunction removeUnlessContent($node, $, weight) {\n // Explicitly save entry-content-asset tags, which are\n // noted as valuable in the Publisher guidelines. For now\n // this works everywhere. We may want to consider making\n // this less of a sure-thing later.\n if ($node.hasClass('entry-content-asset')) {\n return;\n }\n\n const content = normalizeSpaces($node.text());\n\n if (scoreCommas(content) < 10) {\n const pCount = $('p', $node).length;\n const inputCount = $('input', $node).length;\n\n // Looks like a form, too many inputs.\n if (inputCount > (pCount / 3)) {\n $node.remove();\n return;\n }\n\n const contentLength = content.length;\n const imgCount = $('img', $node).length;\n\n // Content is too short, and there are no images, so\n // this is probably junk content.\n if (contentLength < 25 && imgCount === 0) {\n $node.remove();\n return;\n }\n\n const density = linkDensity($node);\n\n // Too high of link density, is probably a menu or\n // something similar.\n // console.log(weight, density, contentLength)\n if (weight < 25 && density > 0.2 && contentLength > 75) {\n $node.remove();\n return;\n }\n\n // Too high of a link density, despite the score being\n // high.\n if (weight >= 25 && density > 0.5) {\n // Don't remove the node if it's a list and the\n // previous sibling starts with a colon though. That\n // means it's probably content.\n const tagName = $node.get(0).tagName.toLowerCase();\n const nodeIsList = tagName === 'ol' || tagName === 'ul';\n if (nodeIsList) {\n const previousNode = $node.prev();\n if (previousNode && normalizeSpaces(previousNode.text()).slice(-1) === ':') {\n return;\n }\n }\n\n $node.remove();\n return;\n }\n\n const scriptCount = $('script', $node).length;\n\n // Too many script tags, not enough content.\n if (scriptCount > 0 && contentLength < 150) {\n $node.remove();\n return;\n }\n }\n}\n\n// Given an article, clean it of some superfluous content specified by\n// tags. Things like forms, ads, etc.\n//\n// Tags is an array of tag name's to search through. (like div, form,\n// etc)\n//\n// Return this same doc.\nexport default function cleanTags($article, $) {\n $(CLEAN_CONDITIONALLY_TAGS, $article).each((index, node) => {\n const $node = $(node);\n // If marked to keep, skip it\n if ($node.hasClass(KEEP_CLASS) || $node.find(`.${KEEP_CLASS}`).length > 0) return;\n\n let weight = getScore($node);\n if (!weight) {\n weight = getOrInitScore($node, $);\n setScore($node, $, weight);\n }\n\n // drop node if its weight is < 0\n if (weight < 0) {\n $node.remove();\n } else {\n // deteremine if node seems like content\n removeUnlessContent($node, $, weight);\n }\n });\n\n return $;\n}\n","import { getWeight } from 'extractors/generic/content/scoring';\n\nimport { HEADER_TAG_LIST } from './constants';\nimport { normalizeSpaces } from '../text';\n\nexport default function cleanHeaders($article, $, title = '') {\n $(HEADER_TAG_LIST, $article).each((index, header) => {\n const $header = $(header);\n // Remove any headers that appear before all other p tags in the\n // document. This probably means that it was part of the title, a\n // subtitle or something else extraneous like a datestamp or byline,\n // all of which should be handled by other metadata handling.\n if ($($header, $article).prevAll('p').length === 0) {\n return $header.remove();\n }\n\n // Remove any headers that match the title exactly.\n if (normalizeSpaces($(header).text()) === title) {\n return $header.remove();\n }\n\n // If this header has a negative weight, it's probably junk.\n // Get rid of it.\n if (getWeight($(header)) < 0) {\n return $header.remove();\n }\n\n return $header;\n });\n\n return $;\n}\n","import { convertNodeTo } from 'utils/dom';\n\n// Rewrite the tag name to div if it's a top level node like body or\n// html to avoid later complications with multiple body tags.\nexport default function rewriteTopLevel(article, $) {\n // I'm not using context here because\n // it's problematic when converting the\n // top-level/root node - AP\n $ = convertNodeTo($('html'), $, 'div');\n $ = convertNodeTo($('body'), $, 'div');\n\n return $;\n}\n","import URL from 'url';\n\nimport {\n getAttrs,\n setAttr,\n} from 'utils/dom';\n\nfunction absolutize($, rootUrl, attr, $content) {\n $(`[${attr}]`, $content).each((_, node) => {\n const attrs = getAttrs(node);\n const url = attrs[attr];\n\n if (url) {\n const absoluteUrl = URL.resolve(rootUrl, url);\n setAttr(node, attr, absoluteUrl);\n }\n });\n}\n\nexport default function makeLinksAbsolute($content, $, url) {\n ['href', 'src'].forEach(attr => absolutize($, url, attr, $content));\n\n return $content;\n}\n","export function textLength(text) {\n return text.trim()\n .replace(/\\s+/g, ' ')\n .length;\n}\n\n// Determines what percentage of the text\n// in a node is link text\n// Takes a node, returns a float\nexport function linkDensity($node) {\n const totalTextLength = textLength($node.text());\n\n const linkText = $node.find('a').text();\n const linkLength = textLength(linkText);\n\n if (totalTextLength > 0) {\n return linkLength / totalTextLength;\n } else if (totalTextLength === 0 && linkLength > 0) {\n return 1;\n }\n\n return 0;\n}\n","import { stripTags } from 'utils/dom';\n\n// Given a node type to search for, and a list of meta tag names to\n// search for, find a meta tag associated.\nexport default function extractFromMeta(\n $,\n metaNames,\n cachedNames,\n cleanTags = true\n) {\n const foundNames = metaNames.filter(name => cachedNames.indexOf(name) !== -1);\n\n for (const name of foundNames) {\n const type = 'name';\n const value = 'value';\n\n const nodes = $(`meta[${type}=\"${name}\"]`);\n\n // Get the unique value of every matching node, in case there\n // are two meta tags with the same name and value.\n // Remove empty values.\n const values =\n nodes.map((index, node) => $(node).attr(value))\n .toArray()\n .filter(text => text !== '');\n\n // If we have more than one value for the same name, we have a\n // conflict and can't trust any of them. Skip this name. If we have\n // zero, that means our meta tags had no values. Skip this name\n // also.\n if (values.length === 1) {\n let metaValue;\n // Meta values that contain HTML should be stripped, as they\n // weren't subject to cleaning previously.\n if (cleanTags) {\n metaValue = stripTags(values[0], $);\n } else {\n metaValue = values[0];\n }\n\n return metaValue;\n }\n }\n\n // If nothing is found, return null\n return null;\n}\n","import { withinComment } from 'utils/dom';\n\nfunction isGoodNode($node, maxChildren) {\n // If it has a number of children, it's more likely a container\n // element. Skip it.\n if ($node.children().length > maxChildren) {\n return false;\n }\n // If it looks to be within a comment, skip it.\n if (withinComment($node)) {\n return false;\n }\n\n return true;\n}\n\n// Given a a list of selectors find content that may\n// be extractable from the document. This is for flat\n// meta-information, like author, title, date published, etc.\nexport default function extractFromSelectors(\n $,\n selectors,\n maxChildren = 1,\n textOnly = true\n) {\n for (const selector of selectors) {\n const nodes = $(selector);\n\n // If we didn't get exactly one of this selector, this may be\n // a list of articles or comments. Skip it.\n if (nodes.length === 1) {\n const $node = $(nodes[0]);\n\n if (isGoodNode($node, maxChildren)) {\n let content;\n if (textOnly) {\n content = $node.text();\n } else {\n content = $node.html();\n }\n\n if (content) {\n return content;\n }\n }\n }\n }\n\n return null;\n}\n","// strips all tags from a string of text\nexport default function stripTags(text, $) {\n // Wrapping text in html element prevents errors when text\n // has no html\n const cleanText = $(`${text}`).text();\n return cleanText === '' ? text : cleanText;\n}\n","import { getAttrs } from 'utils/dom';\n\nexport default function withinComment($node) {\n const parents = $node.parents().toArray();\n const commentParent = parents.find((parent) => {\n const attrs = getAttrs(parent);\n const { class: nodeClass, id } = attrs;\n const classAndId = `${nodeClass} ${id}`;\n return classAndId.includes('comment');\n });\n\n return commentParent !== undefined;\n}\n","// Given a node, determine if it's article-like enough to return\n// param: node (a cheerio node)\n// return: boolean\n\nexport default function nodeIsSufficient($node) {\n return $node.text().trim().length >= 100;\n}\n","import { IS_WP_SELECTOR } from './constants';\n\nexport default function isWordpress($) {\n return $(IS_WP_SELECTOR).length > 0;\n}\n","export default function getAttrs(node) {\n const { attribs, attributes } = node;\n\n if (!attribs && attributes) {\n const attrs = Reflect.ownKeys(attributes).reduce((acc, index) => {\n const attr = attributes[index];\n\n if (!attr.name || !attr.value) return acc;\n\n acc[attr.name] = attr.value;\n return acc;\n }, {});\n return attrs;\n }\n\n return attribs;\n}\n","export default function setAttr(node, attr, val) {\n if (node.attribs) {\n node.attribs[attr] = val;\n } else if (node.attributes) {\n node.setAttribute(attr, val);\n }\n\n return node;\n}\n","export default function setAttrs(node, attrs) {\n if (node.attribs) {\n node.attribs = attrs;\n } else if (node.attributes) {\n while (node.attributes.length > 0) {\n node.removeAttribute(node.attributes[0].name);\n }\n\n Reflect.ownKeys(attrs).forEach((key) => {\n node.setAttribute(key, attrs[key]);\n });\n }\n\n return node;\n}\n","// DOM manipulation\nexport { default as stripUnlikelyCandidates } from './strip-unlikely-candidates';\nexport { default as brsToPs } from './brs-to-ps';\nexport { default as paragraphize } from './paragraphize';\nexport { default as convertToParagraphs } from './convert-to-paragraphs';\nexport { default as convertNodeTo } from './convert-node-to';\nexport { default as cleanImages } from './clean-images';\nexport { default as markToKeep } from './mark-to-keep';\nexport { default as stripJunkTags } from './strip-junk-tags';\nexport { default as cleanHOnes } from './clean-h-ones';\nexport { default as cleanAttributes } from './clean-attributes';\nexport { default as removeEmpty } from './remove-empty';\nexport { default as cleanTags } from './clean-tags';\nexport { default as cleanHeaders } from './clean-headers';\nexport { default as rewriteTopLevel } from './rewrite-top-level';\nexport { default as makeLinksAbsolute } from './make-links-absolute';\nexport { textLength, linkDensity } from './link-density';\nexport { default as extractFromMeta } from './extract-from-meta';\nexport { default as extractFromSelectors } from './extract-from-selectors';\nexport { default as stripTags } from './strip-tags';\nexport { default as withinComment } from './within-comment';\nexport { default as nodeIsSufficient } from './node-is-sufficient';\nexport { default as isWordpress } from './is-wordpress';\nexport { default as getAttrs } from './get-attrs';\nexport { default as setAttr } from './set-attr';\nexport { default as setAttrs } from './set-attrs';\n","export const IS_LINK = new RegExp('https?://', 'i');\nexport const IS_IMAGE = new RegExp('.(png|gif|jpe?g)', 'i');\n\nexport const TAGS_TO_REMOVE = [\n 'script',\n 'style',\n 'form',\n].join(',');\n","import { getAttrs } from 'utils/dom';\n\nimport {\n IS_LINK,\n IS_IMAGE,\n} from './constants';\n\n// Convert all instances of images with potentially\n// lazy loaded images into normal images.\n// Many sites will have img tags with no source, or an image tag with a src\n// attribute that a is a placeholer. We need to be able to properly fill in\n// the src attribute so the images are no longer lazy loaded.\nexport default function convertLazyLoadedImages($) {\n $('img').each((_, img) => {\n const attrs = getAttrs(img);\n\n Reflect.ownKeys(attrs).forEach((attr) => {\n const value = attrs[attr];\n\n if (attr !== 'src' && IS_LINK.test(value) &&\n IS_IMAGE.test(value)) {\n $(img).attr('src', value);\n }\n });\n });\n\n return $;\n}\n","import { TAGS_TO_REMOVE } from './constants';\n\nfunction isComment(index, node) {\n return node.type === 'comment';\n}\n\nfunction cleanComments($) {\n $.root().find('*')\n .contents()\n .filter(isComment)\n .remove();\n\n return $;\n}\n\nexport default function clean($) {\n $(TAGS_TO_REMOVE).remove();\n\n $ = cleanComments($);\n return $;\n}\n","import cheerio from 'cheerio';\nimport iconv from 'iconv-lite';\n\nimport { getEncoding } from 'utils/text';\nimport { fetchResource } from './utils';\nimport {\n normalizeMetaTags,\n convertLazyLoadedImages,\n clean,\n} from './utils/dom';\n\nconst Resource = {\n\n // Create a Resource.\n //\n // :param url: The URL for the document we should retrieve.\n // :param response: If set, use as the response rather than\n // attempting to fetch it ourselves. Expects a\n // string.\n async create(url, preparedResponse, parsedUrl) {\n let result;\n\n if (preparedResponse) {\n const validResponse = {\n statusMessage: 'OK',\n statusCode: 200,\n headers: {\n 'content-type': 'text/html',\n 'content-length': 500,\n },\n };\n\n result = { body: preparedResponse, response: validResponse };\n } else {\n result = await fetchResource(url, parsedUrl);\n }\n\n if (result.error) {\n result.failed = true;\n return result;\n }\n\n return this.generateDoc(result);\n },\n\n generateDoc({ body: content, response }) {\n const { 'content-type': contentType } = response.headers;\n\n // TODO: Implement is_text function from\n // https://github.com/ReadabilityHoldings/readability/blob/8dc89613241d04741ebd42fa9fa7df1b1d746303/readability/utils/text.py#L57\n if (!contentType.includes('html') &&\n !contentType.includes('text')) {\n throw new Error('Content does not appear to be text.');\n }\n\n let $ = this.encodeDoc({ content, contentType });\n\n if ($.root().children().length === 0) {\n throw new Error('No children, likely a bad parse.');\n }\n\n $ = normalizeMetaTags($);\n $ = convertLazyLoadedImages($);\n $ = clean($);\n\n return $;\n },\n\n encodeDoc({ content, contentType }) {\n const encoding = getEncoding(contentType);\n let decodedContent = iconv.decode(content, encoding);\n let $ = cheerio.load(decodedContent);\n\n // after first cheerio.load, check to see if encoding matches\n const metaContentType = $('meta[http-equiv=content-type]').attr('content');\n const properEncoding = getEncoding(metaContentType);\n\n // if encodings in the header/body dont match, use the one in the body\n if (properEncoding !== encoding) {\n decodedContent = iconv.decode(content, properEncoding);\n $ = cheerio.load(decodedContent);\n }\n\n return $;\n },\n};\n\nexport default Resource;\n","const merge = (extractor, domains) => (\n domains.reduce((acc, domain) => {\n acc[domain] = extractor;\n return acc;\n }, {})\n);\n\nexport default function mergeSupportedDomains(extractor) {\n return extractor.supportedDomains ?\n merge(extractor, [extractor.domain, ...extractor.supportedDomains])\n :\n merge(extractor, [extractor.domain]);\n}\n","export const BloggerExtractor = {\n domain: 'blogspot.com',\n content: {\n // Blogger is insane and does not load its content\n // initially in the page, but it's all there\n // in noscript\n selectors: [\n '.post-content noscript',\n ],\n\n // Selectors to remove from the extracted content\n clean: [\n ],\n\n // Convert the noscript tag to a div\n transforms: {\n noscript: 'div',\n },\n },\n\n author: {\n selectors: [\n '.post-author-name',\n ],\n },\n\n title: {\n selectors: [\n '.post h2.title',\n ],\n },\n\n date_published: {\n selectors: [\n 'span.publishdate',\n ],\n },\n};\n","export const NYMagExtractor = {\n domain: 'nymag.com',\n content: {\n // Order by most likely. Extractor will stop on first occurrence\n selectors: [\n 'div.article-content',\n 'section.body',\n 'article.article',\n ],\n\n // Selectors to remove from the extracted content\n clean: [\n '.ad',\n '.single-related-story',\n ],\n\n // Object of tranformations to make on matched elements\n // Each key is the selector, each value is the tag to\n // transform to.\n // If a function is given, it should return a string\n // to convert to or nothing (in which case it will not perform\n // the transformation.\n transforms: {\n // Convert h1s to h2s\n h1: 'h2',\n\n // Convert lazy-loaded noscript images to figures\n noscript: ($node, $) => {\n const $children = $.browser ? $($node.text()) : $node.children();\n if ($children.length === 1 && $children.get(0) !== undefined &&\n $children.get(0).tagName.toLowerCase() === 'img') {\n return 'figure';\n }\n\n return null;\n },\n },\n },\n\n title: {\n selectors: [\n 'h1.lede-feature-title',\n 'h1.headline-primary',\n 'h1',\n ],\n },\n\n author: {\n selectors: [\n '.by-authors',\n '.lede-feature-author',\n ],\n },\n\n dek: {\n selectors: [\n '.lede-feature-teaser',\n ],\n },\n\n date_published: {\n selectors: [\n ['time.article-timestamp[datetime]', 'datetime'],\n 'time.article-timestamp',\n ],\n },\n};\n","export const WikipediaExtractor = {\n domain: 'wikipedia.org',\n content: {\n selectors: [\n '#mw-content-text',\n ],\n\n defaultCleaner: false,\n\n // transform top infobox to an image with caption\n transforms: {\n '.infobox img': ($node) => {\n const $parent = $node.parents('.infobox');\n // Only prepend the first image in .infobox\n if ($parent.children('img').length === 0) {\n $parent.prepend($node);\n }\n },\n '.infobox caption': 'figcaption',\n '.infobox': 'figure',\n },\n\n // Selectors to remove from the extracted content\n clean: [\n '.mw-editsection',\n 'figure tr, figure td, figure tbody',\n '#toc',\n '.navbox',\n ],\n\n },\n\n author: 'Wikipedia Contributors',\n\n title: {\n selectors: [\n 'h2.title',\n ],\n },\n\n date_published: {\n selectors: [\n '#footer-info-lastmod',\n ],\n },\n\n};\n","export const TwitterExtractor = {\n domain: 'twitter.com',\n\n content: {\n transforms: {\n // We're transforming essentially the whole page here.\n // Twitter doesn't have nice selectors, so our initial\n // selector grabs the whole page, then we're re-writing\n // it to fit our needs before we clean it up.\n '.permalink[role=main]': ($node, $) => {\n const tweets = $node.find('.tweet');\n const $tweetContainer = $('');\n $tweetContainer.append(tweets);\n $node.replaceWith($tweetContainer);\n },\n\n // Twitter wraps @ with s, which\n // renders as a strikethrough\n s: 'span',\n },\n\n selectors: [\n '.permalink[role=main]',\n ],\n\n defaultCleaner: false,\n\n clean: [\n '.stream-item-footer',\n 'button',\n '.tweet-details-fixer',\n ],\n },\n\n author: {\n selectors: [\n '.tweet.permalink-tweet .username',\n ],\n },\n\n date_published: {\n selectors: [\n ['.permalink-tweet ._timestamp[data-time-ms]', 'data-time-ms'],\n // '.tweet.permalink-tweet .metadata',\n ],\n },\n\n};\n","export const NYTimesExtractor = {\n domain: 'www.nytimes.com',\n\n title: {\n selectors: [\n 'h1.g-headline',\n 'h1[itemprop=\"headline\"]',\n 'h1.headline',\n ],\n },\n\n author: {\n selectors: [\n ['meta[name=\"author\"]', 'value'],\n '.g-byline',\n '.byline',\n ],\n },\n\n content: {\n selectors: [\n 'div.g-blocks',\n 'article#story',\n ],\n\n transforms: {\n 'img.g-lazy': ($node) => {\n let src = $node.attr('src');\n // const widths = $node.attr('data-widths')\n // .slice(1)\n // .slice(0, -1)\n // .split(',');\n // if (widths.length) {\n // width = widths.slice(-1);\n // } else {\n // width = '900';\n // }\n const width = 640;\n\n src = src.replace('{{size}}', width);\n $node.attr('src', src);\n },\n },\n\n clean: [\n '.ad',\n 'header#story-header',\n '.story-body-1 .lede.video',\n '.visually-hidden',\n '#newsletter-promo',\n '.promo',\n '.comments-button',\n '.hidden',\n '.comments',\n '.supplemental',\n '.nocontent',\n '.story-footer-links',\n ],\n },\n\n date_published: {\n selectors: [\n ['meta[name=\"article:published\"]', 'value'],\n ],\n },\n\n lead_image_url: {\n selectors: [\n ['meta[name=\"og:image\"]', 'value'],\n ],\n },\n\n dek: null,\n\n next_page_url: null,\n\n excerpt: null,\n};\n","// Rename CustomExtractor\n// to fit your publication\nexport const TheAtlanticExtractor = {\n domain: 'www.theatlantic.com',\n title: {\n selectors: [\n 'h1.hed',\n ],\n },\n\n author: {\n selectors: [\n 'article#article .article-cover-extra .metadata .byline a',\n ],\n },\n\n content: {\n selectors: [\n ['.article-cover figure.lead-img', '.article-body'],\n '.article-body',\n ],\n\n // Is there anything in the content you selected that needs transformed\n // before it's consumable content? E.g., unusual lazy loaded images\n transforms: [\n ],\n\n // Is there anything that is in the result that shouldn't be?\n // The clean selectors will remove anything that matches from\n // the result\n clean: [\n '.partner-box',\n '.callout',\n ],\n },\n\n date_published: {\n selectors: [\n ['time[itemProp=\"datePublished\"]', 'datetime'],\n ],\n },\n\n lead_image_url: null,\n\n next_page_url: null,\n\n excerpt: null,\n};\n","// Rename CustomExtractor\n// to fit your publication\n// (e.g., NYTimesExtractor)\nexport const NewYorkerExtractor = {\n domain: 'www.newyorker.com',\n title: {\n selectors: [\n 'h1.title',\n ],\n },\n\n author: {\n selectors: [\n '.contributors',\n ],\n },\n\n content: {\n selectors: [\n 'div#articleBody',\n 'div.articleBody',\n ],\n\n // Is there anything in the content you selected that needs transformed\n // before it's consumable content? E.g., unusual lazy loaded images\n transforms: [\n ],\n\n // Is there anything that is in the result that shouldn't be?\n // The clean selectors will remove anything that matches from\n // the result\n clean: [\n\n ],\n },\n\n date_published: {\n selectors: [\n ['meta[name=\"article:published_time\"]', 'value'],\n ['time[itemProp=\"datePublished\"]', 'content'],\n ],\n\n timezone: 'America/New_York',\n },\n\n lead_image_url: {\n selectors: [\n ['meta[name=\"og:image\"]', 'value'],\n ],\n },\n\n dek: {\n selectors: [\n '.dek',\n 'h2.dek',\n ],\n },\n\n next_page_url: null,\n\n excerpt: null,\n};\n","// Rename CustomExtractor\n// to fit your publication\n// (e.g., NYTimesExtractor)\nexport const WiredExtractor = {\n domain: 'www.wired.com',\n title: {\n selectors: [\n 'h1.post-title',\n // enter title selectors\n ],\n },\n\n author: {\n selectors: [\n 'a[rel=\"author\"]',\n // enter author selectors\n ],\n },\n\n content: {\n selectors: [\n 'article.content',\n // enter content selectors\n ],\n\n // Is there anything in the content you selected that needs transformed\n // before it's consumable content? E.g., unusual lazy loaded images\n transforms: [\n ],\n\n // Is there anything that is in the result that shouldn't be?\n // The clean selectors will remove anything that matches from\n // the result\n clean: [\n '.visually-hidden',\n 'figcaption img.photo',\n\n ],\n },\n\n date_published: {\n selectors: [\n ['meta[itemprop=\"datePublished\"]', 'value'],\n ],\n },\n\n lead_image_url: {\n selectors: [\n ['meta[name=\"og:image\"]', 'value'],\n ],\n },\n\n dek: {\n selectors: [\n ],\n },\n\n next_page_url: null,\n\n excerpt: null,\n};\n","// Rename CustomExtractor\n// to fit your publication\n// (e.g., NYTimesExtractor)\nexport const MSNExtractor = {\n domain: 'www.msn.com',\n title: {\n selectors: [\n 'h1',\n // enter title selectors\n ],\n },\n\n author: {\n selectors: [\n 'span.authorname-txt',\n // enter author selectors\n ],\n },\n\n content: {\n selectors: [\n 'div.richtext',\n // enter content selectors\n ],\n\n // Is there anything in the content you selected that needs transformed\n // before it's consumable content? E.g., unusual lazy loaded images\n transforms: [\n ],\n\n // Is there anything that is in the result that shouldn't be?\n // The clean selectors will remove anything that matches from\n // the result\n clean: [\n 'span.caption',\n\n ],\n },\n\n date_published: {\n selectors: [\n 'span.time',\n ],\n },\n\n lead_image_url: {\n selectors: [\n\n ],\n },\n\n dek: {\n selectors: [\n ],\n },\n\n next_page_url: null,\n\n excerpt: null,\n};\n","// Rename CustomExtractor\n// to fit your publication\n// (e.g., NYTimesExtractor)\nexport const YahooExtractor = {\n domain: 'www.yahoo.com',\n title: {\n selectors: [\n 'header.canvas-header',\n // enter title selectors\n ],\n },\n\n author: {\n selectors: [\n 'span.provider-name',\n // enter author selectors\n ],\n },\n\n content: {\n selectors: [\n // enter content selectors\n '.content-canvas',\n ],\n\n // Is there anything in the content you selected that needs transformed\n // before it's consumable content? E.g., unusual lazy loaded images\n transforms: [\n ],\n\n // Is there anything that is in the result that shouldn't be?\n // The clean selectors will remove anything that matches from\n // the result\n clean: [\n '.figure-caption',\n\n ],\n },\n\n date_published: {\n selectors: [\n ['time.date[datetime]', 'datetime'],\n ],\n },\n\n lead_image_url: {\n selectors: [\n ['meta[name=\"og:image\"]', 'value'],\n ],\n },\n\n dek: {\n selectors: [\n // enter dek selectors\n ],\n },\n\n next_page_url: null,\n\n excerpt: null,\n};\n","// Rename CustomExtractor\n// to fit your publication\n// (e.g., NYTimesExtractor)\nexport const BuzzfeedExtractor = {\n domain: 'www.buzzfeed.com',\n title: {\n selectors: [\n 'h1[id=\"post-title\"]',\n // enter title selectors\n ],\n },\n\n author: {\n selectors: [\n 'a[data-action=\"user/username\"]', 'byline__author',\n // enter author selectors\n ],\n },\n\n content: {\n selectors: [\n ['.longform_custom_header_media', '#buzz_sub_buzz'],\n '#buzz_sub_buzz',\n ],\n\n defaultCleaner: false,\n\n // Is there anything in the content you selected that needs transformed\n // before it's consumable content? E.g., unusual lazy loaded images\n transforms: {\n h2: 'b',\n\n 'div.longform_custom_header_media': ($node) => {\n if ($node.has('img') && $node.has('.longform_header_image_source')) {\n return 'figure';\n }\n\n return null;\n },\n\n 'figure.longform_custom_header_media .longform_header_image_source':\n 'figcaption',\n },\n\n // Is there anything that is in the result that shouldn't be?\n // The clean selectors will remove anything that matches from\n // the result\n clean: [\n '.instapaper_ignore',\n '.suplist_list_hide .buzz_superlist_item .buzz_superlist_number_inline',\n '.share-box',\n '.print',\n ],\n },\n\n date_published: {\n selectors: [\n '.buzz-datetime',\n ],\n },\n\n lead_image_url: {\n selectors: [\n ['meta[name=\"og:image\"]', 'value'],\n ],\n },\n\n dek: {\n selectors: [\n ],\n },\n\n next_page_url: null,\n\n excerpt: null,\n};\n","// Rename CustomExtractor\n// to fit your publication\n// (e.g., NYTimesExtractor)\nexport const WikiaExtractor = {\n domain: 'fandom.wikia.com',\n title: {\n selectors: [\n 'h1.entry-title',\n // enter title selectors\n ],\n },\n\n author: {\n selectors: [\n '.author vcard', '.fn',\n // enter author selectors\n ],\n },\n\n content: {\n selectors: [\n '.grid-content',\n '.entry-content',\n // enter content selectors\n ],\n\n // Is there anything in the content you selected that needs transformed\n // before it's consumable content? E.g., unusual lazy loaded images\n transforms: [\n ],\n\n // Is there anything that is in the result that shouldn't be?\n // The clean selectors will remove anything that matches from\n // the result\n clean: [\n\n ],\n },\n\n date_published: {\n selectors: [\n ['meta[name=\"article:published_time\"]', 'value'],\n ],\n },\n\n lead_image_url: {\n selectors: [\n ['meta[name=\"og:image\"]', 'value'],\n ],\n },\n\n dek: {\n selectors: [\n ],\n },\n\n next_page_url: null,\n\n excerpt: null,\n};\n","// Rename CustomExtractor\n// to fit your publication\n// (e.g., NYTimesExtractor)\nexport const LittleThingsExtractor = {\n domain: 'www.littlethings.com',\n title: {\n selectors: [\n 'h1.post-title',\n // enter title selectors\n ],\n },\n\n author: {\n selectors: [\n ['meta[name=\"author\"]', 'value'],\n // enter author selectors\n ],\n },\n\n content: {\n selectors: [\n // enter content selectors\n '.mainContentIntro',\n '.content-wrapper',\n ],\n\n // Is there anything in the content you selected that needs transformed\n // before it's consumable content? E.g., unusual lazy loaded images\n transforms: [\n ],\n\n // Is there anything that is in the result that shouldn't be?\n // The clean selectors will remove anything that matches from\n // the result\n clean: [\n\n ],\n },\n\n lead_image_url: {\n selectors: [\n ['meta[name=\"og:image\"]', 'value'],\n ],\n },\n\n next_page_url: null,\n\n excerpt: null,\n};\n","// Rename CustomExtractor\n// to fit your publication\n// (e.g., NYTimesExtractor)\nexport const PoliticoExtractor = {\n domain: 'www.politico.com',\n title: {\n selectors: [\n // enter title selectors\n ['meta[name=\"og:title\"]', 'value'],\n ],\n },\n\n author: {\n selectors: [\n '.story-main-content .byline .vcard',\n ],\n },\n\n content: {\n selectors: [\n // enter content selectors\n '.story-main-content',\n '.content-group', '.story-core',\n '.story-text',\n ],\n\n // Is there anything in the content you selected that needs transformed\n // before it's consumable content? E.g., unusual lazy loaded images\n transforms: [\n ],\n\n // Is there anything that is in the result that shouldn't be?\n // The clean selectors will remove anything that matches from\n // the result\n clean: [\n 'figcaption',\n ],\n },\n\n date_published: {\n selectors: [\n ['.story-main-content .timestamp time[datetime]', 'datetime'],\n\n ],\n },\n\n lead_image_url: {\n selectors: [\n // enter lead_image_url selectors\n ['meta[name=\"og:image\"]', 'value'],\n\n ],\n },\n\n dek: {\n selectors: [\n ],\n },\n\n next_page_url: null,\n\n excerpt: null,\n};\n","export const DeadspinExtractor = {\n domain: 'deadspin.com',\n\n supportedDomains: [\n 'jezebel.com',\n 'lifehacker.com',\n 'kotaku.com',\n 'gizmodo.com',\n 'jalopnik.com',\n 'kinja.com',\n ],\n\n title: {\n selectors: [\n 'h1.headline',\n ],\n },\n\n author: {\n selectors: [\n '.author',\n ],\n },\n\n content: {\n selectors: [\n '.post-content',\n '.entry-content',\n ],\n\n // Is there anything in the content you selected that needs transformed\n // before it's consumable content? E.g., unusual lazy loaded images\n transforms: {\n 'iframe.lazyload[data-recommend-id^=\"youtube://\"]': ($node) => {\n const youtubeId = $node.attr('id').split('youtube-')[1];\n $node.attr('src', `https://www.youtube.com/embed/${youtubeId}`);\n },\n },\n\n // Is there anything that is in the result that shouldn't be?\n // The clean selectors will remove anything that matches from\n // the result\n clean: [\n '.magnifier',\n '.lightbox',\n ],\n },\n\n date_published: {\n selectors: [\n ['time.updated[datetime]', 'datetime'],\n ],\n },\n\n lead_image_url: {\n selectors: [\n ['meta[name=\"og:image\"]', 'value'],\n ],\n },\n\n dek: {\n selectors: [\n // enter selectors\n ],\n },\n\n next_page_url: {\n selectors: [\n // enter selectors\n ],\n },\n\n excerpt: {\n selectors: [\n // enter selectors\n ],\n },\n};\n","// Rename CustomExtractor\n// to fit your publication\n// (e.g., NYTimesExtractor)\nexport const BroadwayWorldExtractor = {\n domain: 'www.broadwayworld.com',\n title: {\n selectors: [\n 'h1.article-title',\n ],\n },\n\n author: {\n selectors: [\n 'span[itemprop=author]',\n ],\n },\n\n content: {\n selectors: [\n 'div[itemprop=articlebody]',\n ],\n\n // Is there anything in the content you selected that needs transformed\n // before it's consumable content? E.g., unusual lazy loaded images\n transforms: {\n },\n\n // Is there anything that is in the result that shouldn't be?\n // The clean selectors will remove anything that matches from\n // the result\n clean: [\n\n ],\n },\n\n date_published: {\n selectors: [\n ['meta[itemprop=datePublished]', 'value'],\n ],\n },\n\n lead_image_url: {\n selectors: [\n ['meta[name=\"og:image\"]', 'value'],\n ],\n },\n\n dek: {\n selectors: [\n ],\n },\n\n next_page_url: {\n selectors: [\n // enter selectors\n ],\n },\n\n excerpt: {\n selectors: [\n // enter selectors\n ],\n },\n};\n","// Rename CustomExtractor\n// to fit your publication\n// (e.g., NYTimesExtractor)\nexport const ApartmentTherapyExtractor = {\n domain: 'www.apartmenttherapy.com',\n title: {\n selectors: [\n 'h1.headline',\n ],\n },\n\n author: {\n selectors: [\n '.PostByline__name',\n ],\n },\n\n content: {\n selectors: [\n 'div.post__content',\n ],\n\n // Is there anything in the content you selected that needs transformed\n // before it's consumable content? E.g., unusual lazy loaded images\n transforms: {\n 'div[data-render-react-id=\"images/LazyPicture\"]': ($node, $) => {\n const data = JSON.parse($node.attr('data-props'));\n const { src } = data.sources[0];\n const $img = $('').attr('src', src);\n $node.replaceWith($img);\n },\n },\n\n // Is there anything that is in the result that shouldn't be?\n // The clean selectors will remove anything that matches from\n // the result\n clean: [\n\n ],\n },\n\n date_published: {\n selectors: [\n ['.PostByline__timestamp[datetime]', 'datetime'],\n ],\n },\n\n lead_image_url: {\n selectors: [\n ['meta[name=\"og:image\"]', 'value'],\n ],\n },\n\n dek: {\n selectors: [\n ],\n },\n\n next_page_url: {\n selectors: [\n // enter selectors\n ],\n },\n\n excerpt: {\n selectors: [\n // enter selectors\n ],\n },\n};\n","export const MediumExtractor = {\n domain: 'medium.com',\n\n supportedDomains: [\n 'trackchanges.postlight.com',\n ],\n\n title: {\n selectors: [\n 'h1',\n ],\n },\n\n author: {\n selectors: [\n ['meta[name=\"author\"]', 'value'],\n ],\n },\n\n content: {\n selectors: [\n ['.section-content'],\n '.section-content',\n 'article > div > section',\n ],\n\n // Is there anything in the content you selected that needs transformed\n // before it's consumable content? E.g., unusual lazy loaded images\n transforms: {\n // Re-write lazy-loaded youtube videos\n iframe: ($node) => {\n const ytRe =\n /https:\\/\\/i.embed.ly\\/.+url=https:\\/\\/i\\.ytimg\\.com\\/vi\\/(\\w+)\\//;\n const thumb = decodeURIComponent($node.attr('data-thumbnail'));\n\n if (ytRe.test(thumb)) {\n const [_, youtubeId] = thumb.match(ytRe) // eslint-disable-line\n $node.attr('src', `https://www.youtube.com/embed/${youtubeId}`);\n const $parent = $node.parents('figure');\n const $caption = $parent.find('figcaption');\n $parent.empty().append([$node, $caption]);\n }\n },\n\n // rewrite figures to pull out image and caption, remove rest\n figure: ($node) => {\n // ignore if figure has an iframe\n if ($node.find('iframe').length > 0) return;\n\n const $img = $node.find('img').slice(-1)[0];\n const $caption = $node.find('figcaption');\n $node.empty().append([$img, $caption]);\n },\n },\n\n // Is there anything that is in the result that shouldn't be?\n // The clean selectors will remove anything that matches from\n // the result\n clean: [\n\n ],\n },\n\n date_published: {\n selectors: [\n ['time[datetime]', 'datetime'],\n ],\n },\n\n lead_image_url: {\n selectors: [\n ['meta[name=\"og:image\"]', 'value'],\n ],\n },\n\n dek: {\n selectors: [\n // enter selectors\n ],\n },\n\n next_page_url: {\n selectors: [\n // enter selectors\n ],\n },\n\n excerpt: {\n selectors: [\n // enter selectors\n ],\n },\n};\n","export const WwwTmzComExtractor = {\n domain: 'www.tmz.com',\n\n title: {\n selectors: [\n '.post-title-breadcrumb',\n 'h1',\n '.headline',\n ],\n },\n\n author: 'TMZ STAFF',\n\n date_published: {\n selectors: [\n '.article-posted-date',\n ],\n\n timezone: 'America/Los_Angeles',\n },\n\n dek: {\n selectors: [\n // enter selectors\n ],\n },\n\n lead_image_url: {\n selectors: [\n ['meta[name=\"og:image\"]', 'value'],\n ],\n },\n\n content: {\n selectors: [\n '.article-content',\n '.all-post-body',\n ],\n\n // Is there anything in the content you selected that needs transformed\n // before it's consumable content? E.g., unusual lazy loaded images\n transforms: {\n },\n\n // Is there anything that is in the result that shouldn't be?\n // The clean selectors will remove anything that matches from\n // the result\n clean: [\n '.lightbox-link',\n ],\n },\n};\n","export const WwwWashingtonpostComExtractor = {\n domain: 'www.washingtonpost.com',\n\n title: {\n selectors: [\n 'h1',\n '#topper-headline-wrapper',\n ],\n },\n\n author: {\n selectors: [\n '.pb-byline',\n ],\n },\n\n date_published: {\n selectors: [\n ['.pb-timestamp[itemprop=\"datePublished\"]', 'content'],\n ],\n },\n\n dek: {\n selectors: [\n ],\n },\n\n lead_image_url: {\n selectors: [\n ['meta[name=\"og:image\"]', 'value'],\n ],\n },\n\n content: {\n selectors: [\n '.article-body',\n ],\n\n // Is there anything in the content you selected that needs transformed\n // before it's consumable content? E.g., unusual lazy loaded images\n transforms: {\n 'div.inline-content': ($node) => {\n if ($node.has('img,iframe,video').length > 0) {\n return 'figure';\n }\n\n $node.remove();\n return null;\n },\n '.pb-caption': 'figcaption',\n },\n\n // Is there anything that is in the result that shouldn't be?\n // The clean selectors will remove anything that matches from\n // the result\n clean: [\n '.interstitial-link',\n '.newsletter-inline-unit',\n ],\n },\n};\n","export const WwwHuffingtonpostComExtractor = {\n domain: 'www.huffingtonpost.com',\n\n title: {\n selectors: [\n 'h1.headline__title',\n ],\n },\n\n author: {\n selectors: [\n 'span.author-card__details__name',\n ],\n },\n\n date_published: {\n selectors: [\n ['meta[name=\"article:modified_time\"]', 'value'],\n ['meta[name=\"article:published_time\"]', 'value'],\n ],\n },\n\n dek: {\n selectors: [\n 'h2.headline__subtitle',\n ],\n },\n\n lead_image_url: {\n selectors: [\n ['meta[name=\"og:image\"]', 'value'],\n ],\n },\n\n content: {\n selectors: [\n 'div.entry__body',\n ],\n\n defaultCleaner: false,\n\n // Is there anything in the content you selected that needs transformed\n // before it's consumable content? E.g., unusual lazy loaded images\n transforms: {\n // 'div.top-media': ($node) => {\n // const $figure = $node.children('figure');\n // $node.replaceWith($figure);\n // },\n },\n\n // Is there anything that is in the result that shouldn't be?\n // The clean selectors will remove anything that matches from\n // the result\n clean: [\n '.pull-quote',\n '.tag-cloud',\n '.embed-asset',\n '.below-entry',\n '.entry-corrections',\n '#suggested-story',\n ],\n },\n};\n","export const NewrepublicComExtractor = {\n domain: 'newrepublic.com',\n\n title: {\n selectors: [\n 'h1.article-headline',\n '.minutes-primary h1.minute-title',\n ],\n },\n\n author: {\n selectors: [\n 'div.author-list',\n '.minutes-primary h3.minute-byline',\n ],\n },\n\n date_published: {\n selectors: [\n ['meta[name=\"article:published_time\"]', 'value'],\n ],\n\n timezone: 'America/New_York',\n },\n\n dek: {\n selectors: [\n 'h2.article-subhead',\n ],\n },\n\n lead_image_url: {\n selectors: [\n ['meta[name=\"og:image\"]', 'value'],\n ],\n },\n\n content: {\n selectors: [\n ['.article-cover', 'div.content-body'],\n ['.minute-image', '.minutes-primary div.content-body'],\n ],\n\n // Is there anything in the content you selected that needs transformed\n // before it's consumable content? E.g., unusual lazy loaded images\n transforms: {\n },\n\n // Is there anything that is in the result that shouldn't be?\n // The clean selectors will remove anything that matches from\n // the result\n clean: [\n 'aside',\n ],\n },\n};\n","export const MoneyCnnComExtractor = {\n domain: 'money.cnn.com',\n\n title: {\n selectors: [\n '.article-title',\n ],\n },\n\n author: {\n selectors: [\n '.byline a',\n ],\n },\n\n date_published: {\n selectors: [\n ['meta[name=\"date\"]', 'value'],\n ],\n\n timezone: 'GMT',\n },\n\n dek: {\n selectors: [\n '#storytext h2',\n ],\n },\n\n lead_image_url: {\n selectors: [\n ['meta[name=\"og:image\"]', 'value'],\n ],\n },\n\n content: {\n selectors: [\n '#storytext',\n ],\n\n // Is there anything in the content you selected that needs transformed\n // before it's consumable content? E.g., unusual lazy loaded images\n transforms: {\n },\n\n // Is there anything that is in the result that shouldn't be?\n // The clean selectors will remove anything that matches from\n // the result\n clean: [\n '.inStoryHeading',\n ],\n },\n};\n","export const WwwThevergeComExtractor = {\n domain: 'www.theverge.com',\n\n supportedDomains: ['www.polygon.com'],\n\n title: {\n selectors: [\n 'h1',\n ],\n },\n\n author: {\n selectors: [\n ['meta[name=\"author\"]', 'value'],\n ],\n },\n\n date_published: {\n selectors: [\n ['meta[name=\"article:published_time\"]', 'value'],\n ],\n },\n\n dek: {\n selectors: [\n 'h2.p-dek',\n ],\n },\n\n lead_image_url: {\n selectors: [\n ['meta[name=\"og:image\"]', 'value'],\n ],\n },\n\n content: {\n selectors: [\n // feature template multi-match\n ['.c-entry-hero .e-image', '.c-entry-intro', '.c-entry-content'],\n // regular post multi-match\n ['.e-image--hero', '.c-entry-content'],\n // feature template fallback\n '.l-wrapper .l-feature',\n // regular post fallback\n 'div.c-entry-content',\n ],\n\n // Transform lazy-loaded images\n transforms: {\n noscript: ($node) => {\n const $children = $node.children();\n if ($children.length === 1 && $children.get(0).tagName === 'img') {\n return 'span';\n }\n\n return null;\n },\n },\n\n // Is there anything that is in the result that shouldn't be?\n // The clean selectors will remove anything that matches from\n // the result\n clean: [\n '.aside',\n 'img.c-dynamic-image', // images come from noscript transform\n ],\n },\n};\n","export const WwwCnnComExtractor = {\n domain: 'www.cnn.com',\n\n title: {\n selectors: [\n 'h1.pg-headline',\n 'h1',\n ],\n },\n\n author: {\n selectors: [\n '.metadata__byline__author',\n ],\n },\n\n date_published: {\n selectors: [\n ['meta[name=\"pubdate\"]', 'value'],\n ],\n },\n\n lead_image_url: {\n selectors: [\n ['meta[name=\"og:image\"]', 'value'],\n ],\n },\n\n content: {\n selectors: [\n // a more specific selector to grab the lead image and the body\n ['.media__video--thumbnail', '.zn-body-text'],\n // a fallback for the above\n '.zn-body-text',\n 'div[itemprop=\"articleBody\"]',\n ],\n\n // Is there anything in the content you selected that needs transformed\n // before it's consumable content? E.g., unusual lazy loaded images\n transforms: {\n '.zn-body__paragraph, .el__leafmedia--sourced-paragraph': ($node) => {\n const $text = $node.html();\n if ($text) {\n return 'p';\n }\n\n return null;\n },\n\n // this transform cleans the short, all-link sections linking\n // to related content but not marked as such in any way.\n '.zn-body__paragraph': ($node) => {\n if ($node.has('a')) {\n if ($node.text().trim() === $node.find('a').text().trim()) {\n $node.remove();\n }\n }\n },\n\n '.media__video--thumbnail': 'figure',\n\n },\n\n // Is there anything that is in the result that shouldn't be?\n // The clean selectors will remove anything that matches from\n // the result\n clean: [\n ],\n },\n};\n","export const WwwAolComExtractor = {\n domain: 'www.aol.com',\n\n title: {\n selectors: [\n 'h1.p-article__title',\n ],\n },\n\n author: {\n selectors: [\n ['meta[name=\"author\"]', 'value'],\n ],\n },\n\n date_published: {\n selectors: [\n '.p-article__byline__date',\n ],\n\n timezone: 'America/New_York',\n },\n\n dek: {\n selectors: [\n // enter selectors\n ],\n },\n\n lead_image_url: {\n selectors: [\n ['meta[name=\"og:image\"]', 'value'],\n ],\n },\n\n content: {\n selectors: [\n '.article-content',\n ],\n\n // Is there anything in the content you selected that needs transformed\n // before it's consumable content? E.g., unusual lazy loaded images\n transforms: {\n },\n\n // Is there anything that is in the result that shouldn't be?\n // The clean selectors will remove anything that matches from\n // the result\n clean: [\n\n ],\n },\n};\n","export const WwwYoutubeComExtractor = {\n domain: 'www.youtube.com',\n\n title: {\n selectors: [\n '.watch-title',\n 'h1.watch-title-container',\n ],\n },\n\n author: {\n selectors: [\n '.yt-user-info',\n ],\n },\n\n date_published: {\n selectors: [\n ['meta[itemProp=\"datePublished\"]', 'value'],\n ],\n\n timezone: 'GMT',\n },\n\n dek: {\n selectors: [\n // enter selectors\n ],\n },\n\n lead_image_url: {\n selectors: [\n ['meta[name=\"og:image\"]', 'value'],\n ],\n },\n\n content: {\n defaultCleaner: false,\n\n selectors: [\n ['#player-api', '#eow-description'],\n ],\n\n // Is there anything in the content you selected that needs transformed\n // before it's consumable content? E.g., unusual lazy loaded images\n transforms: {\n '#player-api': ($node, $) => {\n const videoId = $('meta[itemProp=\"videoId\"]').attr('value');\n $node.html(`\n `\n );\n },\n },\n\n // Is there anything that is in the result that shouldn't be?\n // The clean selectors will remove anything that matches from\n // the result\n clean: [\n\n ],\n },\n};\n","export const WwwTheguardianComExtractor = {\n domain: 'www.theguardian.com',\n\n title: {\n selectors: [\n '.content__headline',\n ],\n },\n\n author: {\n selectors: [\n 'p.byline',\n ],\n },\n\n date_published: {\n selectors: [\n ['meta[name=\"article:published_time\"]', 'value'],\n ],\n },\n\n dek: {\n selectors: [\n '.content__standfirst',\n ],\n },\n\n lead_image_url: {\n selectors: [\n ['meta[name=\"og:image\"]', 'value'],\n ],\n },\n\n content: {\n selectors: [\n '.content__article-body',\n ],\n\n // Is there anything in the content you selected that needs transformed\n // before it's consumable content? E.g., unusual lazy loaded images\n transforms: {\n },\n\n // Is there anything that is in the result that shouldn't be?\n // The clean selectors will remove anything that matches from\n // the result\n clean: [\n '.hide-on-mobile',\n '.inline-icon',\n ],\n },\n};\n","export const WwwSbnationComExtractor = {\n domain: 'www.sbnation.com',\n\n title: {\n selectors: [\n 'h1.c-page-title',\n ],\n },\n\n author: {\n selectors: [\n ['meta[name=\"author\"]', 'value'],\n ],\n },\n\n date_published: {\n selectors: [\n ['meta[name=\"article:published_time\"]', 'value'],\n ],\n },\n\n dek: {\n selectors: [\n 'h2.c-entry-summary.p-dek',\n ],\n },\n\n lead_image_url: {\n selectors: [\n ['meta[name=\"og:image\"]', 'value'],\n ],\n },\n\n content: {\n selectors: [\n 'div.c-entry-content',\n ],\n\n // Is there anything in the content you selected that needs transformed\n // before it's consumable content? E.g., unusual lazy loaded images\n transforms: {\n },\n\n // Is there anything that is in the result that shouldn't be?\n // The clean selectors will remove anything that matches from\n // the result\n clean: [\n\n ],\n },\n};\n","export const WwwBloombergComExtractor = {\n domain: 'www.bloomberg.com',\n\n title: {\n selectors: [\n // normal articles\n '.lede-headline',\n\n // /graphics/ template\n 'h1.article-title',\n\n // /news/ template\n 'h1.lede-text-only__hed',\n ],\n },\n\n author: {\n selectors: [\n ['meta[name=\"parsely-author\"]', 'value'],\n '.byline-details__link',\n\n // /graphics/ template\n '.bydek',\n\n // /news/ template\n '.author',\n ],\n },\n\n date_published: {\n selectors: [\n ['time.published-at', 'datetime'],\n ['time[datetime]', 'datetime'],\n ['meta[name=\"date\"]', 'value'],\n ['meta[name=\"parsely-pub-date\"]', 'value'],\n ],\n },\n\n dek: {\n selectors: [\n ],\n },\n\n lead_image_url: {\n selectors: [\n ['meta[name=\"og:image\"]', 'value'],\n ],\n },\n\n content: {\n selectors: [\n '.article-body__content',\n\n // /graphics/ template\n ['section.copy-block'],\n\n // /news/ template\n '.body-copy',\n ],\n\n // Is there anything in the content you selected that needs transformed\n // before it's consumable content? E.g., unusual lazy loaded images\n transforms: {\n },\n\n // Is there anything that is in the result that shouldn't be?\n // The clean selectors will remove anything that matches from\n // the result\n clean: [\n '.inline-newsletter',\n '.page-ad',\n ],\n },\n};\n","export const WwwBustleComExtractor = {\n domain: 'www.bustle.com',\n\n title: {\n selectors: [\n 'h1.post-page__title',\n ],\n },\n\n author: {\n selectors: [\n 'div.content-meta__author',\n ],\n },\n\n date_published: {\n selectors: [\n ['time.content-meta__published-date[datetime]', 'datetime'],\n ],\n },\n\n lead_image_url: {\n selectors: [\n ['meta[name=\"og:image\"]', 'value'],\n ],\n },\n\n content: {\n selectors: [\n '.post-page__body',\n ],\n\n // Is there anything in the content you selected that needs transformed\n // before it's consumable content? E.g., unusual lazy loaded images\n transforms: {\n },\n\n // Is there anything that is in the result that shouldn't be?\n // The clean selectors will remove anything that matches from\n // the result\n clean: [\n\n ],\n },\n};\n","export const WwwNprOrgExtractor = {\n domain: 'www.npr.org',\n\n title: {\n selectors: [\n 'h1',\n '.storytitle',\n ],\n },\n\n author: {\n selectors: [\n 'p.byline__name.byline__name--block',\n ],\n },\n\n date_published: {\n selectors: [\n ['.dateblock time[datetime]', 'datetime'],\n ['meta[name=\"date\"]', 'value'],\n ],\n },\n\n lead_image_url: {\n selectors: [\n ['meta[name=\"og:image\"]', 'value'],\n ['meta[name=\"twitter:image:src\"]', 'value'],\n ],\n },\n\n content: {\n selectors: [\n '.storytext',\n ],\n\n // Is there anything in the content you selected that needs transformed\n // before it's consumable content? E.g., unusual lazy loaded images\n transforms: {\n '.bucketwrap.image': 'figure',\n '.bucketwrap.image .credit-caption': 'figcaption',\n },\n\n // Is there anything that is in the result that shouldn't be?\n // The clean selectors will remove anything that matches from\n // the result\n clean: [\n 'div.enlarge_measure',\n ],\n },\n};\n","export const WwwRecodeNetExtractor = {\n domain: 'www.recode.net',\n\n title: {\n selectors: [\n 'h1.c-page-title',\n ],\n },\n\n author: {\n selectors: [\n ['meta[name=\"author\"]', 'value'],\n ],\n },\n\n date_published: {\n selectors: [\n ['meta[name=\"article:published_time\"]', 'value'],\n ],\n },\n\n dek: {\n selectors: [\n 'h2.c-entry-summary.p-dek',\n ],\n },\n\n lead_image_url: {\n selectors: [\n ['meta[name=\"og:image\"]', 'value'],\n ],\n },\n\n content: {\n selectors: [\n ['figure.e-image--hero', '.c-entry-content'],\n '.c-entry-content',\n ],\n\n // Is there anything in the content you selected that needs transformed\n // before it's consumable content? E.g., unusual lazy loaded images\n transforms: {\n },\n\n // Is there anything that is in the result that shouldn't be?\n // The clean selectors will remove anything that matches from\n // the result\n clean: [\n\n ],\n },\n};\n","export const QzComExtractor = {\n domain: 'qz.com',\n\n title: {\n selectors: [\n 'header.item-header.content-width-responsive',\n ],\n },\n\n author: {\n selectors: [\n ['meta[name=\"author\"]', 'value'],\n ],\n },\n\n date_published: {\n selectors: [\n '.timestamp',\n ],\n },\n\n lead_image_url: {\n selectors: [\n ['meta[name=\"og:image\"]', 'value'],\n ],\n },\n\n content: {\n selectors: [\n ['figure.featured-image', '.item-body'],\n '.item-body',\n ],\n\n // Is there anything in the content you selected that needs transformed\n // before it's consumable content? E.g., unusual lazy loaded images\n transforms: {\n },\n\n // Is there anything that is in the result that shouldn't be?\n // The clean selectors will remove anything that matches from\n // the result\n clean: [\n '.article-aside',\n '.progressive-image-thumbnail',\n ],\n },\n};\n","export const WwwDmagazineComExtractor = {\n domain: 'www.dmagazine.com',\n\n title: {\n selectors: [\n 'h1.story__title',\n ],\n },\n\n author: {\n selectors: [\n '.story__info .story__info__item:first-child',\n ],\n },\n\n date_published: {\n selectors: [\n // enter selectors\n '.story__info',\n ],\n\n timezone: 'America/Chicago',\n },\n\n dek: {\n selectors: [\n '.story__subhead',\n ],\n },\n\n lead_image_url: {\n selectors: [\n ['article figure a:first-child', 'href'],\n ],\n },\n\n content: {\n selectors: [\n '.story__content',\n ],\n\n // Is there anything in the content you selected that needs transformed\n // before it's consumable content? E.g., unusual lazy loaded images\n transforms: {\n },\n\n // Is there anything that is in the result that shouldn't be?\n // The clean selectors will remove anything that matches from\n // the result\n clean: [\n\n ],\n },\n};\n","export const WwwReutersComExtractor = {\n domain: 'www.reuters.com',\n\n title: {\n selectors: [\n 'h1.article-headline',\n ],\n },\n\n author: {\n selectors: [\n '.author',\n ],\n },\n\n date_published: {\n selectors: [\n ['meta[name=\"og:article:published_time\"]', 'value'],\n ],\n },\n\n lead_image_url: {\n selectors: [\n ['meta[name=\"og:image\"]', 'value'],\n ],\n },\n\n content: {\n selectors: [\n '#article-text',\n ],\n\n // Is there anything in the content you selected that needs transformed\n // before it's consumable content? E.g., unusual lazy loaded images\n transforms: {\n '.article-subtitle': 'h4',\n },\n\n // Is there anything that is in the result that shouldn't be?\n // The clean selectors will remove anything that matches from\n // the result\n clean: [\n '#article-byline .author',\n // 'span.location',\n // 'span.articleLocation',\n ],\n },\n};\n","export const MashableComExtractor = {\n domain: 'mashable.com',\n\n title: {\n selectors: [\n 'h1.title',\n ],\n },\n\n author: {\n selectors: [\n 'span.author_name a',\n ],\n },\n\n date_published: {\n selectors: [\n ['meta[name=\"og:article:published_time\"]', 'value'],\n ],\n },\n\n lead_image_url: {\n selectors: [\n ['meta[name=\"og:image\"]', 'value'],\n ],\n },\n\n content: {\n selectors: [\n 'section.article-content.blueprint',\n ],\n\n // Is there anything in the content you selected that needs transformed\n // before it's consumable content? E.g., unusual lazy loaded images\n transforms: {\n '.image-credit': 'figcaption',\n },\n\n // Is there anything that is in the result that shouldn't be?\n // The clean selectors will remove anything that matches from\n // the result\n clean: [\n\n ],\n },\n};\n","export const WwwChicagotribuneComExtractor = {\n domain: 'www.chicagotribune.com',\n\n title: {\n selectors: [\n 'h1.trb_ar_hl_t',\n ],\n },\n\n author: {\n selectors: [\n 'span.trb_ar_by_nm_au',\n ],\n },\n\n date_published: {\n selectors: [\n ['meta[itemprop=\"datePublished\"]', 'value'],\n ],\n },\n\n lead_image_url: {\n selectors: [\n ['meta[name=\"og:image\"]', 'value'],\n ],\n },\n\n content: {\n selectors: [\n 'div.trb_ar_page',\n ],\n\n // Is there anything in the content you selected that needs transformed\n // before it's consumable content? E.g., unusual lazy loaded images\n transforms: {\n },\n\n // Is there anything that is in the result that shouldn't be?\n // The clean selectors will remove anything that matches from\n // the result\n clean: [\n\n ],\n },\n};\n","export const WwwVoxComExtractor = {\n domain: 'www.vox.com',\n\n title: {\n selectors: [\n 'h1.c-page-title',\n ],\n },\n\n author: {\n selectors: [\n ['meta[name=\"author\"]', 'value'],\n ],\n },\n\n date_published: {\n selectors: [\n ['meta[name=\"article:published_time\"]', 'value'],\n ],\n },\n\n dek: {\n selectors: [\n '.p-dek',\n ],\n },\n\n lead_image_url: {\n selectors: [\n ['meta[name=\"og:image\"]', 'value'],\n ],\n },\n\n content: {\n selectors: [\n ['figure.e-image--hero', '.c-entry-content'],\n '.c-entry-content',\n ],\n\n // Is there anything in the content you selected that needs transformed\n // before it's consumable content? E.g., unusual lazy loaded images\n transforms: {\n 'figure .e-image__image noscript': ($node) => {\n const imgHtml = $node.html();\n $node.parents('.e-image__image').find('.c-dynamic-image').replaceWith(imgHtml);\n },\n\n 'figure .e-image__meta': 'figcaption',\n },\n\n // Is there anything that is in the result that shouldn't be?\n // The clean selectors will remove anything that matches from\n // the result\n clean: [\n\n ],\n },\n};\n","export const NewsNationalgeographicComExtractor = {\n domain: 'news.nationalgeographic.com',\n\n title: {\n selectors: [\n 'h1',\n 'h1.main-title',\n ],\n },\n\n author: {\n selectors: [\n '.byline-component__contributors b span',\n ],\n },\n\n date_published: {\n selectors: [\n ['meta[name=\"article:published_time\"]', 'value'],\n ],\n format: 'ddd MMM DD HH:mm:ss zz YYYY',\n timezone: 'EST',\n },\n\n dek: {\n selectors: [\n '.article__deck',\n ],\n },\n\n lead_image_url: {\n selectors: [\n ['meta[name=\"og:image\"]', 'value'],\n ],\n },\n\n content: {\n selectors: [\n ['.parsys.content', '.__image-lead__'],\n '.content',\n ],\n\n // Is there anything in the content you selected that needs transformed\n // before it's consumable content? E.g., unusual lazy loaded images\n transforms: {\n '.parsys.content': ($node, $) => {\n const $imgSrc = $node.find('.image.parbase.section')\n .find('.picturefill')\n .first()\n .data('platform-src');\n if ($imgSrc) {\n $node.prepend($(``));\n }\n },\n },\n\n // Is there anything that is in the result that shouldn't be?\n // The clean selectors will remove anything that matches from\n // the result\n clean: [\n '.pull-quote.pull-quote--large',\n ],\n },\n};\n","export const WwwNationalgeographicComExtractor = {\n domain: 'www.nationalgeographic.com',\n\n title: {\n selectors: [\n 'h1',\n 'h1.main-title',\n ],\n },\n\n author: {\n selectors: [\n '.byline-component__contributors b span',\n ],\n },\n\n date_published: {\n selectors: [\n ['meta[name=\"article:published_time\"]', 'value'],\n ],\n },\n\n dek: {\n selectors: [\n '.article__deck',\n ],\n },\n\n lead_image_url: {\n selectors: [\n ['meta[name=\"og:image\"]', 'value'],\n ],\n },\n\n content: {\n selectors: [\n ['.parsys.content', '.__image-lead__'],\n '.content',\n ],\n\n // Is there anything in the content you selected that needs transformed\n // before it's consumable content? E.g., unusual lazy loaded images\n transforms: {\n '.parsys.content': ($node, $) => {\n const $imageParent = $node.children().first();\n if ($imageParent.hasClass('imageGroup')) {\n const $dataAttrContainer = $imageParent.find('.media--medium__container').children().first();\n const imgPath1 = $dataAttrContainer.data('platform-image1-path');\n const imgPath2 = $dataAttrContainer.data('platform-image2-path');\n if (imgPath2 && imgPath1) {\n $node.prepend($(`
\n \n \n
`));\n }\n } else {\n const $imgSrc = $node.find('.image.parbase.section')\n .find('.picturefill')\n .first()\n .data('platform-src');\n if ($imgSrc) {\n $node.prepend($(``));\n }\n }\n },\n },\n\n // Is there anything that is in the result that shouldn't be?\n // The clean selectors will remove anything that matches from\n // the result\n clean: [\n '.pull-quote.pull-quote--small',\n ],\n },\n};\n","export const WwwLatimesComExtractor = {\n domain: 'www.latimes.com',\n\n title: {\n selectors: [\n '.trb_ar_hl',\n ],\n },\n\n author: {\n selectors: [\n ['meta[name=\"author\"]', 'value'],\n ],\n },\n\n date_published: {\n selectors: [\n ['meta[itemprop=\"datePublished\"]', 'value'],\n ],\n },\n\n lead_image_url: {\n selectors: [\n ['meta[name=\"og:image\"]', 'value'],\n ],\n },\n\n content: {\n selectors: [\n '.trb_ar_main',\n ],\n\n // Is there anything in the content you selected that needs transformed\n // before it's consumable content? E.g., unusual lazy loaded images\n transforms: {\n '.trb_ar_la': ($node) => {\n const $figure = $node.find('figure');\n $node.replaceWith($figure);\n },\n },\n\n // Is there anything that is in the result that shouldn't be?\n // The clean selectors will remove anything that matches from\n // the result\n clean: [\n '.trb_ar_by',\n '.trb_ar_cr',\n ],\n },\n};\n","export const PagesixComExtractor = {\n domain: 'pagesix.com',\n\n supportedDomains: [\n 'nypost.com',\n ],\n\n title: {\n selectors: [\n 'h1 a',\n ],\n },\n\n author: {\n selectors: [\n '.byline',\n ],\n },\n\n date_published: {\n selectors: [\n ['meta[name=\"article:published_time\"]', 'value'],\n ],\n },\n\n dek: {\n selectors: [\n ['meta[name=\"description\"]', 'value'],\n ],\n },\n\n lead_image_url: {\n selectors: [\n ['meta[name=\"og:image\"]', 'value'],\n ],\n },\n\n content: {\n selectors: [\n ['#featured-image-wrapper', '.entry-content'],\n '.entry-content',\n ],\n\n // Is there anything in the content you selected that needs transformed\n // before it's consumable content? E.g., unusual lazy loaded images\n transforms: {\n '#featured-image-wrapper': 'figure',\n '.wp-caption-text': 'figcaption',\n },\n\n // Is there anything that is in the result that shouldn't be?\n // The clean selectors will remove anything that matches from\n // the result\n clean: [\n '.modal-trigger',\n ],\n },\n};\n","export const ThefederalistpapersOrgExtractor = {\n domain: 'thefederalistpapers.org',\n\n title: {\n selectors: [\n 'h1.entry-title',\n ],\n },\n\n author: {\n selectors: [\n 'main span.entry-author-name',\n ],\n },\n\n date_published: {\n selectors: [\n ['meta[name=\"article:published_time\"]', 'value'],\n ],\n },\n\n lead_image_url: {\n selectors: [\n ['meta[name=\"og:image\"]', 'value'],\n ],\n },\n\n content: {\n selectors: [\n '.entry-content',\n ],\n\n // Is there anything in the content you selected that needs transformed\n // before it's consumable content? E.g., unusual lazy loaded images\n transforms: {\n },\n\n // Is there anything that is in the result that shouldn't be?\n // The clean selectors will remove anything that matches from\n // the result\n clean: [\n ['p[style]'],\n ],\n },\n};\n","export const WwwCbssportsComExtractor = {\n domain: 'www.cbssports.com',\n\n title: {\n selectors: [\n '.article-headline',\n ],\n },\n\n author: {\n selectors: [\n '.author-name',\n ],\n },\n\n date_published: {\n selectors: [\n ['.date-original-reading-time time', 'datetime'],\n ],\n timezone: 'UTC',\n },\n\n dek: {\n selectors: [\n '.article-subline',\n ],\n },\n\n lead_image_url: {\n selectors: [\n ['meta[name=\"og:image\"]', 'value'],\n ],\n },\n\n content: {\n selectors: [\n '.article',\n ],\n\n // Is there anything in the content you selected that needs transformed\n // before it's consumable content? E.g., unusual lazy loaded images\n transforms: {\n },\n\n // Is there anything that is in the result that shouldn't be?\n // The clean selectors will remove anything that matches from\n // the result\n clean: [\n\n ],\n },\n};\n","export const WwwMsnbcComExtractor = {\n domain: 'www.msnbc.com',\n\n title: {\n selectors: [\n 'h1',\n 'h1.is-title-pane',\n ],\n },\n\n author: {\n selectors: [\n '.author',\n ],\n },\n\n date_published: {\n selectors: [\n ['meta[name=\"DC.date.issued\"]', 'value'],\n ],\n },\n\n dek: {\n selectors: [\n ['meta[name=\"description\"]', 'value'],\n ],\n },\n\n lead_image_url: {\n selectors: [\n ['meta[name=\"og:image\"]', 'value'],\n ],\n },\n\n content: {\n selectors: [\n '.pane-node-body',\n ],\n\n // Is there anything in the content you selected that needs transformed\n // before it's consumable content? E.g., unusual lazy loaded images\n transforms: {\n '.pane-node-body': ($node, $) => {\n const [selector, attr] = WwwMsnbcComExtractor.lead_image_url.selectors[0];\n const src = $(selector).attr(attr);\n if (src) {\n $node.prepend(``);\n }\n },\n },\n\n // Is there anything that is in the result that shouldn't be?\n // The clean selectors will remove anything that matches from\n // the result\n clean: [\n\n ],\n },\n};\n","export const WwwThepoliticalinsiderComExtractor = {\n domain: 'www.thepoliticalinsider.com',\n\n title: {\n selectors: [\n ['meta[name=\"sailthru.title\"]', 'value'],\n ],\n },\n\n author: {\n selectors: [\n ['meta[name=\"sailthru.author\"]', 'value'],\n ],\n },\n\n date_published: {\n selectors: [\n ['meta[name=\"sailthru.date\"]', 'value'],\n ],\n timezone: 'America/New_York',\n },\n\n dek: {\n selectors: [\n // enter selectors\n ],\n },\n\n lead_image_url: {\n selectors: [\n ['meta[name=\"og:image\"]', 'value'], // enter selectors\n ],\n },\n\n content: {\n selectors: [\n 'div#article-body',\n ],\n\n // Is there anything in the content you selected that needs transformed\n // before it's consumable content? E.g., unusual lazy loaded images\n transforms: {\n },\n\n // Is there anything that is in the result that shouldn't be?\n // The clean selectors will remove anything that matches from\n // the result\n clean: [\n\n ],\n },\n};\n","export const WwwMentalflossComExtractor = {\n domain: 'www.mentalfloss.com',\n\n title: {\n selectors: [\n 'h1.title',\n '.title-group',\n '.inner',\n ],\n },\n\n author: {\n selectors: [\n '.field-name-field-enhanced-authors',\n ],\n },\n\n date_published: {\n selectors: [\n '.date-display-single',\n ],\n timezone: 'America/New_York',\n },\n\n lead_image_url: {\n selectors: [\n ['meta[name=\"og:image\"]', 'value'],\n ],\n },\n\n content: {\n selectors: [\n 'div.field.field-name-body',\n ],\n\n // Is there anything in the content you selected that needs transformed\n // before it's consumable content? E.g., unusual lazy loaded images\n transforms: {\n },\n\n // Is there anything that is in the result that shouldn't be?\n // The clean selectors will remove anything that matches from\n // the result\n clean: [\n\n ],\n },\n};\n","export const AbcnewsGoComExtractor = {\n domain: 'abcnews.go.com',\n\n title: {\n selectors: [\n '.article-header h1',\n ],\n },\n\n author: {\n selectors: [\n '.authors',\n ],\n clean: [\n '.author-overlay',\n '.by-text',\n ],\n },\n\n date_published: {\n selectors: [\n '.timestamp',\n ],\n timezone: 'America/New_York',\n\n },\n\n lead_image_url: {\n selectors: [\n ['meta[name=\"og:image\"]', 'value'],\n ],\n },\n\n content: {\n selectors: [\n '.article-copy',\n ],\n\n // Is there anything in the content you selected that needs transformed\n // before it's consumable content? E.g., unusual lazy loaded images\n transforms: {\n },\n\n // Is there anything that is in the result that shouldn't be?\n // The clean selectors will remove anything that matches from\n // the result\n clean: [\n\n ],\n },\n};\n","export const WwwNydailynewsComExtractor = {\n domain: 'www.nydailynews.com',\n\n title: {\n selectors: [\n 'h1#ra-headline',\n ],\n },\n\n author: {\n selectors: [\n ['meta[name=\"parsely-author\"]', 'value'],\n ],\n },\n\n date_published: {\n selectors: [\n ['meta[name=\"sailthru.date\"]', 'value'],\n ],\n },\n\n lead_image_url: {\n selectors: [\n ['meta[name=\"og:image\"]', 'value'],\n ],\n },\n\n content: {\n selectors: [\n 'article#ra-body',\n ],\n\n // Is there anything in the content you selected that needs transformed\n // before it's consumable content? E.g., unusual lazy loaded images\n transforms: {\n },\n\n // Is there anything that is in the result that shouldn't be?\n // The clean selectors will remove anything that matches from\n // the result\n clean: [\n 'dl#ra-tags',\n '.ra-related',\n 'a.ra-editor',\n 'dl#ra-share-bottom',\n ],\n },\n};\n","export const WwwCnbcComExtractor = {\n domain: 'www.cnbc.com',\n\n title: {\n selectors: [\n 'h1.title',\n ],\n },\n\n author: {\n selectors: [\n ['meta[name=\"author\"]', 'value'],\n ],\n },\n\n date_published: {\n selectors: [\n ['meta[name=\"article:published_time\"]', 'value'],\n ],\n },\n\n lead_image_url: {\n selectors: [\n ['meta[name=\"og:image\"]', 'value'],\n ],\n },\n\n content: {\n selectors: [\n 'div#article_body.content',\n 'div.story',\n ],\n\n // Is there anything in the content you selected that needs transformed\n // before it's consumable content? E.g., unusual lazy loaded images\n transforms: {\n },\n\n // Is there anything that is in the result that shouldn't be?\n // The clean selectors will remove anything that matches from\n // the result\n clean: [\n\n ],\n },\n};\n","export const WwwPopsugarComExtractor = {\n domain: 'www.popsugar.com',\n\n title: {\n selectors: [\n 'h2.post-title',\n 'title-text',\n ],\n },\n\n author: {\n selectors: [\n ['meta[name=\"article:author\"]', 'value'],\n ],\n },\n\n date_published: {\n selectors: [\n ['meta[name=\"article:published_time\"]', 'value'],\n ],\n },\n\n lead_image_url: {\n selectors: [\n ['meta[name=\"og:image\"]', 'value'],\n ],\n },\n\n content: {\n selectors: [\n '#content',\n ],\n\n // Is there anything in the content you selected that needs transformed\n // before it's consumable content? E.g., unusual lazy loaded images\n transforms: {\n },\n\n // Is there anything that is in the result that shouldn't be?\n // The clean selectors will remove anything that matches from\n // the result\n clean: [\n '.share-copy-title',\n '.post-tags',\n '.reactions',\n ],\n },\n};\n","export const ObserverComExtractor = {\n domain: 'observer.com',\n\n title: {\n selectors: [\n 'h1.entry-title',\n ],\n },\n\n author: {\n selectors: [\n '.author',\n '.vcard',\n ],\n },\n\n date_published: {\n selectors: [\n ['meta[name=\"article:published_time\"]', 'value'],\n ],\n },\n\n dek: {\n selectors: [\n 'h2.dek',\n ],\n },\n\n lead_image_url: {\n selectors: [\n ['meta[name=\"og:image\"]', 'value'],\n ],\n },\n\n content: {\n selectors: [\n 'div.entry-content',\n ],\n\n // Is there anything in the content you selected that needs transformed\n // before it's consumable content? E.g., unusual lazy loaded images\n transforms: {\n },\n\n // Is there anything that is in the result that shouldn't be?\n // The clean selectors will remove anything that matches from\n // the result\n clean: [\n\n ],\n },\n};\n","export const PeopleComExtractor = {\n domain: 'people.com',\n\n title: {\n selectors: [\n ['meta[name=\"og:title\"]', 'value'],\n ],\n },\n\n author: {\n selectors: [\n 'a.author.url.fn',\n ],\n },\n\n date_published: {\n selectors: [\n ['meta[name=\"article:published_time\"]', 'value'],\n ],\n },\n\n lead_image_url: {\n selectors: [\n ['meta[name=\"og:image\"]', 'value'],\n ],\n },\n\n content: {\n selectors: [\n 'div.article-body__inner',\n ],\n\n // Is there anything in the content you selected that needs transformed\n // before it's consumable content? E.g., unusual lazy loaded images\n transforms: {\n },\n\n // Is there anything that is in the result that shouldn't be?\n // The clean selectors will remove anything that matches from\n // the result\n clean: [\n\n ],\n },\n};\n","export const WwwUsmagazineComExtractor = {\n domain: 'www.usmagazine.com',\n\n title: {\n selectors: [\n 'header h1',\n ],\n },\n\n author: {\n selectors: [\n 'a.article-byline.tracked-offpage',\n ],\n },\n\n date_published: {\n timezone: 'America/New_York',\n\n selectors: [\n 'time.article-published-date',\n ],\n },\n\n lead_image_url: {\n selectors: [\n ['meta[name=\"og:image\"]', 'value'],\n ],\n },\n\n content: {\n selectors: [\n 'div.article-body-inner',\n ],\n\n // Is there anything in the content you selected that needs transformed\n // before it's consumable content? E.g., unusual lazy loaded images\n transforms: {\n },\n\n // Is there anything that is in the result that shouldn't be?\n // The clean selectors will remove anything that matches from\n // the result\n clean: [\n '.module-related',\n ],\n },\n};\n","export const WwwRollingstoneComExtractor = {\n domain: 'www.rollingstone.com',\n\n title: {\n selectors: [\n 'h1.content-title',\n ],\n },\n\n author: {\n selectors: [\n 'a.content-author.tracked-offpage',\n ],\n },\n\n date_published: {\n selectors: [\n 'time.content-published-date',\n ],\n\n timezone: 'America/New_York',\n },\n\n dek: {\n selectors: [\n '.content-description',\n ],\n },\n\n lead_image_url: {\n selectors: [\n ['meta[name=\"og:image\"]', 'value'],\n ],\n },\n\n content: {\n selectors: [\n ['.lead-container', '.article-content'],\n '.article-content',\n ],\n\n // Is there anything in the content you selected that needs transformed\n // before it's consumable content? E.g., unusual lazy loaded images\n transforms: {\n },\n\n // Is there anything that is in the result that shouldn't be?\n // The clean selectors will remove anything that matches from\n // the result\n clean: [\n '.module-related',\n ],\n },\n};\n","export const twofortysevensportsComExtractor = {\n domain: '247sports.com',\n\n title: {\n selectors: [\n 'title',\n 'article header h1',\n ],\n },\n\n author: {\n selectors: [\n '.author',\n ],\n },\n\n date_published: {\n selectors: [\n ['time[data-published]', 'data-published'],\n ],\n },\n\n lead_image_url: {\n selectors: [\n ['meta[name=\"og:image\"]', 'value'],\n ],\n },\n\n content: {\n selectors: [\n 'section.body.article',\n ],\n\n // Is there anything in the content you selected that needs transformed\n // before it's consumable content? E.g., unusual lazy loaded images\n transforms: {\n },\n\n // Is there anything that is in the result that shouldn't be?\n // The clean selectors will remove anything that matches from\n // the result\n clean: [\n\n ],\n },\n};\n","export const UproxxComExtractor = {\n domain: 'uproxx.com',\n\n title: {\n selectors: [\n 'div.post-top h1',\n ],\n },\n\n author: {\n selectors: [\n '.post-top .authorname',\n ],\n },\n\n date_published: {\n selectors: [\n ['meta[name=\"article:published_time\"]', 'value'],\n ],\n },\n\n lead_image_url: {\n selectors: [\n ['meta[name=\"og:image\"]', 'value'],\n ],\n },\n\n content: {\n selectors: [\n '.post-body',\n ],\n\n // Is there anything in the content you selected that needs transformed\n // before it's consumable content? E.g., unusual lazy loaded images\n transforms: {\n 'div.image': 'figure',\n 'div.image .wp-media-credit': 'figcaption',\n },\n\n // Is there anything that is in the result that shouldn't be?\n // The clean selectors will remove anything that matches from\n // the result\n clean: [\n\n ],\n },\n};\n","export const WwwEonlineComExtractor = {\n domain: 'www.eonline.com',\n\n title: {\n selectors: [\n 'h1.article__title',\n ],\n },\n\n author: {\n selectors: [\n '.entry-meta__author a',\n ],\n },\n\n date_published: {\n selectors: [\n ['meta[itemprop=\"datePublished\"]', 'value'],\n ],\n },\n\n lead_image_url: {\n selectors: [\n ['meta[name=\"og:image\"]', 'value'],\n ],\n },\n\n content: {\n selectors: [\n ['.post-content section, .post-content div.post-content__image'],\n ],\n\n // Is there anything in the content you selected that needs transformed\n // before it's consumable content? E.g., unusual lazy loaded images\n transforms: {\n 'div.post-content__image': 'figure',\n 'div.post-content__image .image__credits': 'figcaption',\n },\n\n // Is there anything that is in the result that shouldn't be?\n // The clean selectors will remove anything that matches from\n // the result\n clean: [\n\n ],\n },\n};\n","export const WwwMiamiheraldComExtractor = {\n domain: 'www.miamiherald.com',\n\n title: {\n selectors: [\n 'h1.title',\n ],\n },\n\n date_published: {\n selectors: [\n 'p.published-date',\n ],\n\n timezone: 'America/New_York',\n },\n\n lead_image_url: {\n selectors: [\n ['meta[name=\"og:image\"]', 'value'],\n ],\n },\n\n content: {\n selectors: [\n 'div.dateline-storybody',\n ],\n\n // Is there anything in the content you selected that needs transformed\n // before it's consumable content? E.g., unusual lazy loaded images\n transforms: {\n },\n\n // Is there anything that is in the result that shouldn't be?\n // The clean selectors will remove anything that matches from\n // the result\n clean: [\n\n ],\n },\n};\n","export const WwwRefinery29ComExtractor = {\n domain: 'www.refinery29.com',\n\n title: {\n selectors: [\n 'h1.title',\n ],\n },\n\n author: {\n selectors: [\n '.contributor',\n ],\n },\n\n date_published: {\n selectors: [\n ['meta[name=\"sailthru.date\"]', 'value'],\n ],\n\n timezone: 'America/New_York',\n },\n\n lead_image_url: {\n selectors: [\n ['meta[name=\"og:image\"]', 'value'],\n ],\n },\n\n content: {\n selectors: [\n ['.full-width-opener', '.article-content'],\n '.article-content',\n '.body',\n ],\n\n // Is there anything in the content you selected that needs transformed\n // before it's consumable content? E.g., unusual lazy loaded images\n transforms: {\n 'div.loading noscript': ($node) => {\n const imgHtml = $node.html();\n $node.parents('.loading').replaceWith(imgHtml);\n },\n\n '.section-image': 'figure',\n\n '.section-image .content-caption': 'figcaption',\n\n '.section-text': 'p',\n },\n\n // Is there anything that is in the result that shouldn't be?\n // The clean selectors will remove anything that matches from\n // the result\n clean: [\n '.story-share',\n ],\n },\n};\n","export const WwwMacrumorsComExtractor = {\n domain: 'www.macrumors.com',\n\n title: {\n selectors: [\n 'h1',\n 'h1.title',\n ],\n },\n\n author: {\n selectors: [\n '.author-url',\n ],\n },\n\n date_published: {\n selectors: [\n '.article .byline',\n ],\n\n // Wednesday January 18, 2017 11:44 am PST\n format: 'dddd MMMM D, YYYY h:mm A zz',\n\n timezone: 'America/Los_Angeles',\n },\n\n dek: {\n selectors: [\n ['meta[name=\"description\"]', 'value'],\n ],\n },\n\n lead_image_url: {\n selectors: [\n ['meta[name=\"og:image\"]', 'value'],\n ],\n },\n\n content: {\n selectors: [\n '.article',\n ],\n\n // Is there anything in the content you selected that needs transformed\n // before it's consumable content? E.g., unusual lazy loaded images\n transforms: {\n },\n\n // Is there anything that is in the result that shouldn't be?\n // The clean selectors will remove anything that matches from\n // the result\n clean: [\n\n ],\n },\n};\n","export const WwwAndroidcentralComExtractor = {\n domain: 'www.androidcentral.com',\n\n title: {\n selectors: [\n 'h1',\n 'h1.main-title',\n ],\n },\n\n author: {\n selectors: [\n '.meta-by',\n ],\n },\n\n date_published: {\n selectors: [\n ['meta[name=\"article:published_time\"]', 'value'],\n ],\n },\n\n dek: {\n selectors: [\n ['meta[name=\"og:description\"]', 'value'],\n ],\n },\n\n lead_image_url: {\n selectors: [\n ['.image-large', 'src'],\n ],\n },\n\n content: {\n selectors: [\n '.article-body',\n ],\n\n // Is there anything in the content you selected that needs transformed\n // before it's consumable content? E.g., unusual lazy loaded images\n transforms: {\n },\n\n // Is there anything that is in the result that shouldn't be?\n // The clean selectors will remove anything that matches from\n // the result\n clean: [\n '.intro',\n 'blockquote',\n ],\n },\n};\n","export const WwwSiComExtractor = {\n domain: 'www.si.com',\n\n title: {\n selectors: [\n 'h1',\n 'h1.headline',\n ],\n },\n\n author: {\n selectors: [\n ['meta[name=\"author\"]', 'value'],\n ],\n },\n\n date_published: {\n selectors: [\n '.timestamp',\n ],\n\n timezone: 'America/New_York',\n },\n\n dek: {\n selectors: [\n '.quick-hit ul',\n ],\n },\n\n lead_image_url: {\n selectors: [\n ['meta[name=\"og:image\"]', 'value'],\n ],\n },\n\n content: {\n selectors: [\n ['p', '.marquee_large_2x', '.component.image'],\n ],\n\n // Is there anything in the content you selected that needs transformed\n // before it's consumable content? E.g., unusual lazy loaded images\n transforms: {\n\n noscript: ($node) => {\n const $children = $node.children();\n if ($children.length === 1 && $children.get(0).tagName === 'img') {\n return 'figure';\n }\n\n return null;\n },\n },\n\n // Is there anything that is in the result that shouldn't be?\n // The clean selectors will remove anything that matches from\n // the result\n clean: [\n [\n '.inline-thumb',\n '.primary-message',\n '.description',\n '.instructions',\n ],\n ],\n },\n};\n","export const WwwRawstoryComExtractor = {\n domain: 'www.rawstory.com',\n\n title: {\n selectors: [\n '.blog-title',\n ],\n },\n\n author: {\n selectors: [\n '.blog-author a:first-of-type',\n ],\n },\n\n date_published: {\n selectors: [\n '.blog-author a:last-of-type',\n ],\n\n timezone: 'EST',\n },\n\n lead_image_url: {\n selectors: [\n ['meta[name=\"og:image\"]', 'value'],\n ],\n },\n\n content: {\n selectors: [\n '.blog-content',\n ],\n\n // Is there anything in the content you selected that needs transformed\n // before it's consumable content? E.g., unusual lazy loaded images\n transforms: {\n },\n\n // Is there anything that is in the result that shouldn't be?\n // The clean selectors will remove anything that matches from\n // the result\n clean: [\n\n ],\n },\n};\n","export const WwwCnetComExtractor = {\n domain: 'www.cnet.com',\n\n title: {\n selectors: [\n ['meta[name=\"og:title\"]', 'value'],\n ],\n },\n\n author: {\n selectors: [\n 'a.author',\n ],\n },\n\n date_published: {\n selectors: [\n 'time',\n ],\n\n timezone: 'America/Los_Angeles',\n },\n\n dek: {\n selectors: [\n '.article-dek',\n ],\n },\n\n lead_image_url: {\n selectors: [\n ['meta[name=\"og:image\"]', 'value'],\n ],\n },\n\n content: {\n selectors: [\n ['img.__image-lead__', '.article-main-body'],\n '.article-main-body',\n ],\n\n // Is there anything in the content you selected that needs transformed\n // before it's consumable content? E.g., unusual lazy loaded images\n transforms: {\n 'figure.image': ($node) => {\n const $img = $node.find('img');\n $img.attr('width', '100%');\n $img.attr('height', '100%');\n $img.addClass('__image-lead__');\n $node.remove('.imgContainer').prepend($img);\n },\n },\n\n // Is there anything that is in the result that shouldn't be?\n // The clean selectors will remove anything that matches from\n // the result\n clean: [\n\n ],\n },\n};\n","export const WwwCinemablendComExtractor = {\n domain: 'www.cinemablend.com',\n\n title: {\n selectors: [\n '.story_title',\n ],\n },\n\n author: {\n selectors: [\n '.author',\n ],\n },\n\n date_published: {\n selectors: [\n ['meta[name=\"article:published_time\"]', 'value'],\n ],\n\n timezone: 'EST',\n },\n\n lead_image_url: {\n selectors: [\n ['meta[name=\"og:image\"]', 'value'],\n ],\n },\n\n content: {\n selectors: [\n 'div#wrap_left_content',\n ],\n\n // Is there anything in the content you selected that needs transformed\n // before it's consumable content? E.g., unusual lazy loaded images\n transforms: {\n },\n\n // Is there anything that is in the result that shouldn't be?\n // The clean selectors will remove anything that matches from\n // the result\n clean: [\n\n ],\n },\n};\n","export const WwwTodayComExtractor = {\n domain: 'www.today.com',\n\n title: {\n selectors: [\n 'h1.entry-headline',\n ],\n },\n\n author: {\n selectors: [\n ['meta[name=\"author\"]', 'value'],\n ],\n },\n\n date_published: {\n selectors: [\n ['meta[name=\"DC.date.issued\"]', 'value'],\n ],\n },\n\n lead_image_url: {\n selectors: [\n ['meta[name=\"og:image\"]', 'value'],\n ],\n },\n\n content: {\n selectors: [\n '.entry-container',\n ],\n\n // Is there anything in the content you selected that needs transformed\n // before it's consumable content? E.g., unusual lazy loaded images\n transforms: {\n },\n\n // Is there anything that is in the result that shouldn't be?\n // The clean selectors will remove anything that matches from\n // the result\n clean: [\n '.label-comment',\n ],\n },\n};\n","export const WwwHowtogeekComExtractor = {\n domain: 'www.howtogeek.com',\n\n title: {\n selectors: [\n 'title',\n ],\n },\n\n author: {\n selectors: [\n '#authorinfobox a',\n ],\n },\n\n date_published: {\n selectors: [\n '#authorinfobox + div li',\n ],\n timezone: 'GMT',\n },\n\n lead_image_url: {\n selectors: [\n ['meta[name=\"og:image\"]', 'value'],\n ],\n },\n\n content: {\n selectors: [\n '.thecontent',\n ],\n\n // Is there anything in the content you selected that needs transformed\n // before it's consumable content? E.g., unusual lazy loaded images\n transforms: {\n },\n\n // Is there anything that is in the result that shouldn't be?\n // The clean selectors will remove anything that matches from\n // the result\n clean: [\n\n ],\n },\n};\n","export const WwwAlComExtractor = {\n domain: 'www.al.com',\n\n title: {\n selectors: [\n ['meta[name=\"title\"]', 'value'],\n ],\n },\n\n author: {\n selectors: [\n ['meta[name=\"article_author\"]', 'value'],\n ],\n },\n\n date_published: {\n selectors: [\n ['meta[name=\"article_date_original\"]', 'value'],\n ],\n timezone: 'EST',\n },\n\n lead_image_url: {\n selectors: [\n ['meta[name=\"og:image\"]', 'value'],\n ],\n },\n\n content: {\n selectors: [\n '.entry-content',\n ],\n\n // Is there anything in the content you selected that needs transformed\n // before it's consumable content? E.g., unusual lazy loaded images\n transforms: {\n },\n\n // Is there anything that is in the result that shouldn't be?\n // The clean selectors will remove anything that matches from\n // the result\n clean: [\n\n ],\n },\n};\n","export const WwwThepennyhoarderComExtractor = {\n domain: 'www.thepennyhoarder.com',\n\n title: {\n selectors: [\n ['meta[name=\"dcterms.title\"]', 'value'],\n ],\n },\n\n author: {\n selectors: [\n ['link[rel=\"author\"]', 'title'],\n ],\n },\n\n date_published: {\n selectors: [\n ['meta[name=\"article:published_time\"]', 'value'],\n ],\n },\n\n lead_image_url: {\n selectors: [\n ['meta[name=\"og:image\"]', 'value'],\n ],\n },\n\n content: {\n selectors: [\n ['.post-img', '.post-text'],\n '.post-text',\n ],\n\n // Is there anything in the content you selected that needs transformed\n // before it's consumable content? E.g., unusual lazy loaded images\n transforms: {\n },\n\n // Is there anything that is in the result that shouldn't be?\n // The clean selectors will remove anything that matches from\n // the result\n clean: [\n\n ],\n },\n};\n","export const WwwWesternjournalismComExtractor = {\n domain: 'www.westernjournalism.com',\n\n title: {\n selectors: [\n 'title',\n 'h1.entry-title',\n ],\n },\n\n author: {\n selectors: [\n ['meta[name=\"author\"]', 'value'],\n ],\n },\n\n date_published: {\n selectors: [\n ['meta[name=\"DC.date.issued\"]', 'value'],\n ],\n },\n\n dek: {\n selectors: [\n '.subtitle',\n ],\n },\n\n lead_image_url: {\n selectors: [\n ['meta[name=\"og:image\"]', 'value'],\n ],\n },\n\n content: {\n selectors: [\n 'div.article-sharing.top + div',\n ],\n\n // Is there anything in the content you selected that needs transformed\n // before it's consumable content? E.g., unusual lazy loaded images\n transforms: {\n },\n\n // Is there anything that is in the result that shouldn't be?\n // The clean selectors will remove anything that matches from\n // the result\n clean: [\n '.ad-notice-small',\n ],\n },\n};\n","export const FusionNetExtractor = {\n domain: 'fusion.net',\n\n title: {\n selectors: [\n '.post-title',\n '.single-title',\n '.headline',\n ],\n },\n\n author: {\n selectors: [\n '.show-for-medium .byline',\n ],\n },\n\n date_published: {\n selectors: [\n ['time.local-time', 'datetime'],\n ],\n },\n\n dek: {\n selectors: [\n // enter selectors\n ],\n },\n\n lead_image_url: {\n selectors: [\n ['meta[name=\"og:image\"]', 'value'],\n ],\n },\n\n content: {\n selectors: [\n ['.post-featured-media', '.article-content'],\n '.article-content',\n ],\n\n // Is there anything in the content you selected that needs transformed\n // before it's consumable content? E.g., unusual lazy loaded images\n transforms: {\n '.fusion-youtube-oembed': 'figure',\n },\n\n // Is there anything that is in the result that shouldn't be?\n // The clean selectors will remove anything that matches from\n // the result\n clean: [\n\n ],\n },\n};\n","export const WwwAmericanowComExtractor = {\n domain: 'www.americanow.com',\n\n title: {\n selectors: [\n '.title',\n ['meta[name=\"title\"]', 'value'],\n ],\n },\n\n author: {\n selectors: [\n '.byline',\n ],\n },\n\n date_published: {\n selectors: [\n ['meta[name=\"publish_date\"]', 'value'],\n ],\n },\n\n dek: {\n selectors: [\n // enter selectors\n ],\n },\n\n lead_image_url: {\n selectors: [\n ['meta[name=\"og:image\"]', 'value'],\n ],\n },\n\n content: {\n selectors: [\n ['.article-content', '.image', '.body'],\n '.body',\n ],\n\n // Is there anything in the content you selected that needs transformed\n // before it's consumable content? E.g., unusual lazy loaded images\n transforms: {\n },\n\n // Is there anything that is in the result that shouldn't be?\n // The clean selectors will remove anything that matches from\n // the result\n clean: [\n '.article-video-wrapper',\n '.show-for-small-only',\n ],\n },\n};\n","export const ScienceflyComExtractor = {\n domain: 'sciencefly.com',\n\n title: {\n selectors: [\n '.entry-title',\n '.cb-entry-title',\n '.cb-single-title',\n ],\n },\n\n author: {\n selectors: [\n 'div.cb-author',\n 'div.cb-author-title',\n ],\n },\n\n date_published: {\n selectors: [\n ['meta[name=\"article:published_time\"]', 'value'],\n ],\n },\n\n dek: {\n selectors: [\n // enter selectors\n ],\n },\n\n lead_image_url: {\n selectors: [\n ['div.theiaPostSlider_slides img', 'src'],\n ],\n },\n\n content: {\n selectors: [\n 'div.theiaPostSlider_slides',\n ],\n\n // Is there anything in the content you selected that needs transformed\n // before it's consumable content? E.g., unusual lazy loaded images\n transforms: {\n },\n\n // Is there anything that is in the result that shouldn't be?\n // The clean selectors will remove anything that matches from\n // the result\n clean: [\n\n ],\n },\n};\n","export const HellogigglesComExtractor = {\n domain: 'hellogiggles.com',\n\n title: {\n selectors: [\n '.title',\n ],\n },\n\n author: {\n selectors: [\n '.author-link',\n ],\n },\n\n date_published: {\n selectors: [\n ['meta[name=\"article:published_time\"]', 'value'],\n ],\n },\n\n lead_image_url: {\n selectors: [\n ['meta[name=\"og:image\"]', 'value'],\n ],\n },\n\n content: {\n selectors: [\n '.entry-content',\n ],\n\n // Is there anything in the content you selected that needs transformed\n // before it's consumable content? E.g., unusual lazy loaded images\n transforms: {\n },\n\n // Is there anything that is in the result that shouldn't be?\n // The clean selectors will remove anything that matches from\n // the result\n clean: [\n\n ],\n },\n};\n","export const ThoughtcatalogComExtractor = {\n domain: 'thoughtcatalog.com',\n\n title: {\n selectors: [\n 'h1.title',\n ['meta[name=\"og:title\"]', 'value'],\n ],\n },\n\n author: {\n selectors: [\n 'div.col-xs-12.article_header div.writer-container.writer-container-inline.writer-no-avatar h4.writer-name',\n 'h1.writer-name',\n ],\n },\n\n date_published: {\n selectors: [\n ['meta[name=\"article:published_time\"]', 'value'],\n ],\n },\n\n lead_image_url: {\n selectors: [\n ['meta[name=\"og:image\"]', 'value'],\n ],\n },\n\n content: {\n selectors: [\n '.entry.post',\n ],\n\n // Is there anything in the content you selected that needs transformed\n // before it's consumable content? E.g., unusual lazy loaded images\n transforms: {\n },\n\n // Is there anything that is in the result that shouldn't be?\n // The clean selectors will remove anything that matches from\n // the result\n clean: [\n '.tc_mark',\n ],\n },\n};\n","export const WwwNjComExtractor = {\n domain: 'www.nj.com',\n\n title: {\n selectors: [\n ['meta[name=\"title\"]', 'value'],\n ],\n },\n\n author: {\n selectors: [\n ['meta[name=\"article_author\"]', 'value'],\n ],\n },\n\n date_published: {\n selectors: [\n ['meta[name=\"article_date_original\"]', 'value'],\n ],\n\n timezone: 'America/New_York',\n },\n\n lead_image_url: {\n selectors: [\n ['meta[name=\"og:image\"]', 'value'],\n ],\n },\n\n content: {\n selectors: [\n '.entry-content',\n ],\n\n // Is there anything in the content you selected that needs transformed\n // before it's consumable content? E.g., unusual lazy loaded images\n transforms: {\n },\n\n // Is there anything that is in the result that shouldn't be?\n // The clean selectors will remove anything that matches from\n // the result\n clean: [\n\n ],\n },\n};\n","export const WwwInquisitrComExtractor = {\n domain: 'www.inquisitr.com',\n\n title: {\n selectors: [\n 'h1.entry-title.story--header--title',\n ],\n },\n\n author: {\n selectors: [\n 'div.story--header--author',\n ],\n },\n\n date_published: {\n selectors: [\n ['meta[name=\"datePublished\"]', 'value'],\n ],\n },\n\n lead_image_url: {\n selectors: [\n ['meta[name=\"og:image\"]', 'value'],\n ],\n },\n\n content: {\n selectors: [\n 'article.story',\n '.entry-content.',\n ],\n\n // Is there anything in the content you selected that needs transformed\n // before it's consumable content? E.g., unusual lazy loaded images\n transforms: {\n },\n\n // Is there anything that is in the result that shouldn't be?\n // The clean selectors will remove anything that matches from\n // the result\n clean: [\n '.post-category',\n '.story--header--socials',\n '.story--header--content',\n ],\n },\n};\n","export const WwwNbcnewsComExtractor = {\n domain: 'www.nbcnews.com',\n\n title: {\n selectors: [\n 'div.article-hed h1',\n ],\n },\n\n author: {\n selectors: [\n 'span.byline_author',\n ],\n },\n\n date_published: {\n selectors: [\n ['.flag_article-wrapper time.timestamp_article[datetime]', 'datetime'],\n '.flag_article-wrapper time',\n ],\n\n timezone: 'America/New_York',\n },\n\n lead_image_url: {\n selectors: [\n ['meta[name=\"og:image\"]', 'value'],\n ],\n },\n\n content: {\n selectors: [\n 'div.article-body',\n ],\n\n // Is there anything in the content you selected that needs transformed\n // before it's consumable content? E.g., unusual lazy loaded images\n transforms: {\n },\n\n // Is there anything that is in the result that shouldn't be?\n // The clean selectors will remove anything that matches from\n // the result\n clean: [\n\n ],\n },\n};\n","export const FortuneComExtractor = {\n domain: 'fortune.com',\n\n title: {\n selectors: [\n 'h1',\n ],\n },\n\n author: {\n selectors: [\n ['meta[name=\"author\"]', 'value'],\n ],\n },\n\n date_published: {\n selectors: [\n '.MblGHNMJ',\n ],\n\n timezone: 'UTC',\n },\n\n lead_image_url: {\n selectors: [\n ['meta[name=\"og:image\"]', 'value'],\n ],\n },\n\n content: {\n selectors: [\n ['picture', 'article.row'],\n 'article.row',\n ],\n\n // Is there anything in the content you selected that needs transformed\n // before it's consumable content? E.g., unusual lazy loaded images\n transforms: {\n },\n\n // Is there anything that is in the result that shouldn't be?\n // The clean selectors will remove anything that matches from\n // the result\n clean: [\n\n ],\n },\n};\n","export const WwwLinkedinComExtractor = {\n domain: 'www.linkedin.com',\n\n title: {\n selectors: [\n '.article-title',\n 'h1',\n ],\n },\n\n author: {\n selectors: [\n ['meta[name=\"article:author\"]', 'value'],\n '.entity-name a[rel=author]',\n ],\n },\n\n date_published: {\n selectors: [\n ['time[itemprop=\"datePublished\"]', 'datetime'],\n ],\n\n timezone: 'America/Los_Angeles',\n },\n\n dek: {\n selectors: [\n // enter selectors\n ],\n },\n\n lead_image_url: {\n selectors: [\n ['meta[name=\"og:image\"]', 'value'],\n ],\n },\n\n content: {\n selectors: [\n ['header figure', '.prose'],\n '.prose',\n ],\n\n // Is there anything in the content you selected that needs transformed\n // before it's consumable content? E.g., unusual lazy loaded images\n transforms: {\n },\n\n // Is there anything that is in the result that shouldn't be?\n // The clean selectors will remove anything that matches from\n // the result\n clean: [\n '.entity-image',\n ],\n },\n};\n","export const ObamawhitehouseArchivesGovExtractor = {\n domain: 'obamawhitehouse.archives.gov',\n\n supportedDomains: [\n 'whitehouse.gov',\n ],\n\n title: {\n selectors: [\n 'h1',\n '.pane-node-title',\n ],\n },\n\n author: {\n selectors: [\n '.blog-author-link',\n '.node-person-name-link',\n ],\n },\n\n date_published: {\n selectors: [\n ['meta[name=\"article:published_time\"]', 'value'],\n ],\n },\n\n dek: {\n selectors: [\n '.field-name-field-forall-summary',\n ],\n },\n\n lead_image_url: {\n selectors: [\n ['meta[name=\"og:image\"]', 'value'],\n ],\n },\n\n content: {\n defaultCleaner: false,\n\n selectors: [\n 'div#content-start',\n '.pane-node-field-forall-body',\n ],\n\n // Is there anything in the content you selected that needs transformed\n // before it's consumable content? E.g., unusual lazy loaded images\n transforms: {\n },\n\n // Is there anything that is in the result that shouldn't be?\n // The clean selectors will remove anything that matches from\n // the result\n clean: [\n '.pane-node-title',\n '.pane-custom.pane-1',\n ],\n },\n};\n","export const WwwOpposingviewsComExtractor = {\n domain: 'www.opposingviews.com',\n\n title: {\n selectors: [\n 'h1.title',\n ],\n },\n\n author: {\n selectors: [\n 'div.date span span a',\n ],\n },\n\n date_published: {\n selectors: [\n ['meta[name=\"publish_date\"]', 'value'],\n ],\n },\n\n dek: {\n selectors: [\n // enter selectors\n ],\n },\n\n lead_image_url: {\n selectors: [\n ['meta[name=\"og:image\"]', 'value'],\n ],\n },\n\n content: {\n selectors: [\n '.article-content',\n ],\n\n // Is there anything in the content you selected that needs transformed\n // before it's consumable content? E.g., unusual lazy loaded images\n transforms: {\n },\n\n // Is there anything that is in the result that shouldn't be?\n // The clean selectors will remove anything that matches from\n // the result\n clean: [\n '.show-for-small-only',\n ],\n },\n};\n","export const WwwProspectmagazineCoUkExtractor = {\n domain: 'www.prospectmagazine.co.uk',\n\n title: {\n selectors: [\n '.page-title',\n ],\n },\n\n author: {\n selectors: [\n '.aside_author .title',\n ],\n },\n\n date_published: {\n selectors: [\n '.post-info',\n ],\n\n timezone: 'Europe/London',\n },\n\n dek: {\n selectors: [\n '.page-subtitle',\n ],\n },\n\n lead_image_url: {\n selectors: [\n ['meta[name=\"og:image\"]', 'value'],\n ],\n },\n\n content: {\n selectors: [\n // ['article.type-post div.post_content p'],\n 'article .post_content',\n ],\n\n // Is there anything in the content you selected that needs transformed\n // before it's consumable content? E.g., unusual lazy loaded images\n transforms: {\n },\n\n // Is there anything that is in the result that shouldn't be?\n // The clean selectors will remove anything that matches from\n // the result\n clean: [\n\n ],\n },\n};\n","export const ForwardComExtractor = {\n domain: 'forward.com',\n\n title: {\n selectors: [\n ['meta[name=\"og:title\"]', 'value'],\n ],\n },\n\n author: {\n selectors: [\n '.author-name',\n ['meta[name=\"sailthru.author\"]', 'value'],\n ],\n },\n\n date_published: {\n selectors: [\n ['meta[name=\"date\"]', 'value'],\n ],\n },\n\n dek: {\n selectors: [\n // enter selectors\n ],\n },\n\n lead_image_url: {\n selectors: [\n ['meta[name=\"og:image\"]', 'value'],\n ],\n },\n\n content: {\n selectors: [\n ['.post-item-media-wrap', '.post-item p'],\n ],\n\n // Is there anything in the content you selected that needs transformed\n // before it's consumable content? E.g., unusual lazy loaded images\n transforms: {\n },\n\n // Is there anything that is in the result that shouldn't be?\n // The clean selectors will remove anything that matches from\n // the result\n clean: [\n '.donate-box',\n '.message',\n '.subtitle',\n ],\n },\n};\n","export const WwwQdailyComExtractor = {\n domain: 'www.qdaily.com',\n\n title: {\n selectors: [\n 'h2',\n 'h2.title',\n ],\n },\n\n author: {\n selectors: [\n '.name',\n ],\n },\n\n date_published: {\n selectors: [\n ['.date.smart-date', 'data-origindate'],\n ],\n },\n\n dek: {\n selectors: [\n '.excerpt',\n ],\n },\n\n lead_image_url: {\n selectors: [\n ['.article-detail-hd img', 'src'],\n ],\n },\n\n content: {\n selectors: [\n '.detail',\n ],\n\n // Is there anything in the content you selected that needs transformed\n // before it's consumable content? E.g., unusual lazy loaded images\n transforms: {\n },\n\n // Is there anything that is in the result that shouldn't be?\n // The clean selectors will remove anything that matches from\n // the result\n clean: [\n '.lazyload',\n '.lazylad',\n '.lazylood',\n ],\n },\n};\n","export const GothamistComExtractor = {\n domain: 'gothamist.com',\n\n supportedDomains: [\n 'chicagoist.com',\n 'laist.com',\n 'sfist.com',\n 'shanghaiist.com',\n 'dcist.com',\n ],\n\n title: {\n selectors: [\n 'h1',\n '.entry-header h1',\n ],\n },\n\n author: {\n selectors: [\n '.author',\n ],\n },\n\n date_published: {\n selectors: [\n 'abbr',\n 'abbr.published',\n ],\n\n timezone: 'America/New_York',\n },\n\n dek: {\n selectors: [\n null,\n ],\n },\n\n lead_image_url: {\n selectors: [\n ['meta[name=\"og:image\"]', 'value'],\n ],\n },\n\n content: {\n selectors: [\n '.entry-body',\n ],\n\n // Is there anything in the content you selected that needs transformed\n // before it's consumable content? E.g., unusual lazy loaded images\n transforms: {\n 'div.image-none': 'figure',\n '.image-none i': 'figcaption',\n 'div.image-left': 'figure',\n '.image-left i': 'figcaption',\n 'div.image-right': 'figure',\n '.image-right i': 'figcaption',\n },\n\n // Is there anything that is in the result that shouldn't be?\n // The clean selectors will remove anything that matches from\n // the result\n clean: [\n '.image-none br',\n '.image-left br',\n '.image-right br',\n '.galleryEase',\n ],\n },\n};\n","export const WwwFoolComExtractor = {\n domain: 'www.fool.com',\n\n title: {\n selectors: [\n 'h1',\n ],\n },\n\n author: {\n selectors: [\n '.author-inline .author-name',\n ],\n },\n\n date_published: {\n selectors: [\n ['meta[name=\"date\"]', 'value'],\n ],\n },\n\n dek: {\n selectors: [\n 'header h2',\n ],\n },\n\n lead_image_url: {\n selectors: [\n ['meta[name=\"og:image\"]', 'value'],\n ],\n },\n\n content: {\n selectors: [\n '.article-content',\n ],\n\n // Is there anything in the content you selected that needs transformed\n // before it's consumable content? E.g., unusual lazy loaded images\n transforms: {\n '.caption img': ($node) => {\n const src = $node.attr('src');\n $node.parent().replaceWith(``);\n },\n '.caption': 'figcaption',\n },\n\n // Is there anything that is in the result that shouldn't be?\n // The clean selectors will remove anything that matches from\n // the result\n clean: [\n '#pitch',\n ],\n },\n};\n","export const WwwSlateComExtractor = {\n domain: 'www.slate.com',\n\n title: {\n selectors: [\n '.hed',\n 'h1',\n ],\n },\n\n author: {\n selectors: [\n 'a[rel=author]',\n ],\n },\n\n date_published: {\n selectors: [\n '.pub-date',\n ],\n\n timezone: 'America/New_York',\n },\n\n dek: {\n selectors: [\n '.dek',\n ],\n },\n\n lead_image_url: {\n selectors: [\n ['meta[name=\"og:image\"]', 'value'],\n ],\n },\n\n content: {\n selectors: [\n '.body',\n ],\n\n // Is there anything in the content you selected that needs transformed\n // before it's consumable content? E.g., unusual lazy loaded images\n transforms: {\n },\n\n // Is there anything that is in the result that shouldn't be?\n // The clean selectors will remove anything that matches from\n // the result\n clean: [\n '.about-the-author',\n '.pullquote',\n '.newsletter-signup-component',\n '.top-comment',\n ],\n },\n};\n","export const IciRadioCanadaCaExtractor = {\n domain: 'ici.radio-canada.ca',\n\n title: {\n selectors: [\n 'h1',\n ],\n },\n\n author: {\n selectors: [\n ['meta[name=\"dc.creator\"]', 'value'],\n ],\n },\n\n date_published: {\n selectors: [\n ['meta[name=\"dc.date.created\"]', 'value'],\n ],\n\n timezone: 'America/New_York',\n },\n\n dek: {\n selectors: [\n '.bunker-component.lead',\n ],\n },\n\n lead_image_url: {\n selectors: [\n ['meta[name=\"og:image\"]', 'value'],\n ],\n },\n\n content: {\n selectors: [\n ['.main-multimedia-item', '.news-story-content'],\n ],\n\n // Is there anything in the content you selected that needs transformed\n // before it's consumable content? E.g., unusual lazy loaded images\n transforms: {\n },\n\n // Is there anything that is in the result that shouldn't be?\n // The clean selectors will remove anything that matches from\n // the result\n clean: [\n\n ],\n },\n};\n","import mergeSupportedDomains from 'utils/merge-supported-domains';\nimport * as CustomExtractors from './custom/index';\n\nexport default Object.keys(CustomExtractors).reduce((acc, key) => {\n const extractor = CustomExtractors[key];\n return {\n ...acc,\n ...mergeSupportedDomains(extractor),\n };\n}, {});\n","// CLEAN AUTHOR CONSTANTS\nexport const CLEAN_AUTHOR_RE = /^\\s*(posted |written )?by\\s*:?\\s*(.*)/i;\n // author = re.sub(r'^\\s*(posted |written )?by\\s*:?\\s*(.*)(?i)',\n\n// CLEAN DEK CONSTANTS\nexport const TEXT_LINK_RE = new RegExp('http(s)?://', 'i');\n// An ordered list of meta tag names that denote likely article deks.\n// From most distinct to least distinct.\n//\n// NOTE: There are currently no meta tags that seem to provide the right\n// content consistenty enough. Two options were:\n// - og:description\n// - dc.description\n// However, these tags often have SEO-specific junk in them that's not\n// header-worthy like a dek is. Excerpt material at best.\nexport const DEK_META_TAGS = [\n];\n\n// An ordered list of Selectors to find likely article deks. From\n// most explicit to least explicit.\n//\n// Should be more restrictive than not, as a failed dek can be pretty\n// detrimental to the aesthetics of an article.\nexport const DEK_SELECTORS = [\n '.entry-summary',\n];\n\n// CLEAN DATE PUBLISHED CONSTANTS\nexport const MS_DATE_STRING = /^\\d{13}$/i;\nexport const SEC_DATE_STRING = /^\\d{10}$/i;\nexport const CLEAN_DATE_STRING_RE = /^\\s*published\\s*:?\\s*(.*)/i;\nexport const TIME_MERIDIAN_SPACE_RE = /(.*\\d)(am|pm)(.*)/i;\nexport const TIME_MERIDIAN_DOTS_RE = /\\.m\\./i;\nconst months = [\n 'jan',\n 'feb',\n 'mar',\n 'apr',\n 'may',\n 'jun',\n 'jul',\n 'aug',\n 'sep',\n 'oct',\n 'nov',\n 'dec',\n];\nconst allMonths = months.join('|');\nconst timestamp1 = '[0-9]{1,2}:[0-9]{2,2}( ?[ap].?m.?)?';\nconst timestamp2 = '[0-9]{1,2}[/-][0-9]{1,2}[/-][0-9]{2,4}';\nconst timestamp3 = '-[0-9]{3,4}$';\nexport const SPLIT_DATE_STRING =\n new RegExp(`(${timestamp1})|(${timestamp2})|(${timestamp3})|([0-9]{1,4})|(${allMonths})`, 'ig');\n\n// 2016-11-22T08:57-500\n// Check if datetime string has an offset at the end\nexport const TIME_WITH_OFFSET_RE = /-\\d{3,4}$/;\n\n// CLEAN TITLE CONSTANTS\n// A regular expression that will match separating characters on a\n// title, that usually denote breadcrumbs or something similar.\nexport const TITLE_SPLITTERS_RE = /(: | - | \\| )/g;\n\nexport const DOMAIN_ENDINGS_RE =\n new RegExp('.com$|.net$|.org$|.co.uk$', 'g');\n","import { normalizeSpaces } from 'utils/text';\nimport { CLEAN_AUTHOR_RE } from './constants';\n\n// Take an author string (like 'By David Smith ') and clean it to\n// just the name(s): 'David Smith'.\nexport default function cleanAuthor(author) {\n return normalizeSpaces(\n author.replace(CLEAN_AUTHOR_RE, '$2').trim()\n );\n}\n","import validUrl from 'valid-url';\n\nexport default function clean(leadImageUrl) {\n leadImageUrl = leadImageUrl.trim();\n if (validUrl.isWebUri(leadImageUrl)) {\n return leadImageUrl;\n }\n\n return null;\n}\n","import { stripTags } from 'utils/dom';\nimport {\n excerptContent,\n normalizeSpaces,\n} from 'utils/text';\n\nimport { TEXT_LINK_RE } from './constants';\n\n// Take a dek HTML fragment, and return the cleaned version of it.\n// Return None if the dek wasn't good enough.\nexport default function cleanDek(dek, { $, excerpt }) {\n // Sanity check that we didn't get too short or long of a dek.\n if (dek.length > 1000 || dek.length < 5) return null;\n\n // Check that dek isn't the same as excerpt\n if (excerpt && excerptContent(excerpt, 10) === excerptContent(dek, 10)) return null;\n\n const dekText = stripTags(dek, $);\n\n // Plain text links shouldn't exist in the dek. If we have some, it's\n // not a good dek - bail.\n if (TEXT_LINK_RE.test(dekText)) return null;\n\n return normalizeSpaces(dekText.trim());\n}\n","import moment from 'moment-timezone';\nimport parseFormat from 'moment-parseformat';\n// Is there a compelling reason to use moment here?\n// Mostly only being used for the isValid() method,\n// but could just check for 'Invalid Date' string.\n\nimport {\n MS_DATE_STRING,\n SEC_DATE_STRING,\n CLEAN_DATE_STRING_RE,\n SPLIT_DATE_STRING,\n TIME_MERIDIAN_SPACE_RE,\n TIME_MERIDIAN_DOTS_RE,\n TIME_WITH_OFFSET_RE,\n} from './constants';\n\nexport function cleanDateString(dateString) {\n return (dateString.match(SPLIT_DATE_STRING) || [])\n .join(' ')\n .replace(TIME_MERIDIAN_DOTS_RE, 'm')\n .replace(TIME_MERIDIAN_SPACE_RE, '$1 $2 $3')\n .replace(CLEAN_DATE_STRING_RE, '$1')\n .trim();\n}\n\nexport function createDate(dateString, timezone, format) {\n if (TIME_WITH_OFFSET_RE.test(dateString)) {\n return moment(new Date(dateString));\n }\n\n return timezone ?\n moment.tz(dateString, format || parseFormat(dateString), timezone) :\n moment(dateString, format || parseFormat(dateString));\n}\n\n// Take a date published string, and hopefully return a date out of\n// it. Return none if we fail.\nexport default function cleanDatePublished(dateString, { timezone, format } = {}) {\n // If string is in milliseconds or seconds, convert to int and return\n if (MS_DATE_STRING.test(dateString) || SEC_DATE_STRING.test(dateString)) {\n return new Date(parseInt(dateString, 10)).toISOString();\n }\n\n let date = createDate(dateString, timezone, format);\n\n if (!date.isValid()) {\n dateString = cleanDateString(dateString);\n date = createDate(dateString, timezone, format);\n }\n\n return date.isValid() ? date.toISOString() : null;\n}\n","import {\n cleanAttributes,\n cleanHeaders,\n cleanHOnes,\n cleanImages,\n cleanTags,\n removeEmpty,\n rewriteTopLevel,\n markToKeep,\n stripJunkTags,\n makeLinksAbsolute,\n} from 'utils/dom';\n\n// Clean our article content, returning a new, cleaned node.\nexport default function extractCleanNode(\n article,\n {\n $,\n cleanConditionally = true,\n title = '',\n url = '',\n defaultCleaner = true,\n }\n) {\n // Rewrite the tag name to div if it's a top level node like body or\n // html to avoid later complications with multiple body tags.\n rewriteTopLevel(article, $);\n\n // Drop small images and spacer images\n // Only do this is defaultCleaner is set to true;\n // this can sometimes be too aggressive.\n if (defaultCleaner) cleanImages(article, $);\n\n // Make links absolute\n makeLinksAbsolute(article, $, url);\n\n // Mark elements to keep that would normally be removed.\n // E.g., stripJunkTags will remove iframes, so we're going to mark\n // YouTube/Vimeo videos as elements we want to keep.\n markToKeep(article, $, url);\n\n // Drop certain tags like , etc\n // This is -mostly- for cleanliness, not security.\n stripJunkTags(article, $);\n\n // H1 tags are typically the article title, which should be extracted\n // by the title extractor instead. If there's less than 3 of them (<3),\n // strip them. Otherwise, turn 'em into H2s.\n cleanHOnes(article, $);\n\n // Clean headers\n cleanHeaders(article, $, title);\n\n // We used to clean UL's and OL's here, but it was leading to\n // too many in-article lists being removed. Consider a better\n // way to detect menus particularly and remove them.\n // Also optionally running, since it can be overly aggressive.\n if (defaultCleaner) cleanTags(article, $, cleanConditionally);\n\n // Remove empty paragraph nodes\n removeEmpty(article, $);\n\n // Remove unnecessary attributes\n cleanAttributes(article, $);\n\n return article;\n}\n","import { stripTags } from 'utils/dom';\nimport { normalizeSpaces } from 'utils/text';\n\nimport { TITLE_SPLITTERS_RE } from './constants';\nimport { resolveSplitTitle } from './index';\n\nexport default function cleanTitle(title, { url, $ }) {\n // If title has |, :, or - in it, see if\n // we can clean it up.\n if (TITLE_SPLITTERS_RE.test(title)) {\n title = resolveSplitTitle(title, url);\n }\n\n // Final sanity check that we didn't get a crazy title.\n // if (title.length > 150 || title.length < 15) {\n if (title.length > 150) {\n // If we did, return h1 from the document if it exists\n const h1 = $('h1');\n if (h1.length === 1) {\n title = h1.text();\n }\n }\n\n // strip any html tags in the title text\n return normalizeSpaces(stripTags(title, $).trim());\n}\n","import URL from 'url';\nimport wuzzy from 'wuzzy';\n\nimport {\n TITLE_SPLITTERS_RE,\n DOMAIN_ENDINGS_RE,\n} from './constants';\n\nfunction extractBreadcrumbTitle(splitTitle, text) {\n // This must be a very breadcrumbed title, like:\n // The Best Gadgets on Earth : Bits : Blogs : NYTimes.com\n // NYTimes - Blogs - Bits - The Best Gadgets on Earth\n if (splitTitle.length >= 6) {\n // Look to see if we can find a breadcrumb splitter that happens\n // more than once. If we can, we'll be able to better pull out\n // the title.\n const termCounts = splitTitle.reduce((acc, titleText) => {\n acc[titleText] = acc[titleText] ? acc[titleText] + 1 : 1;\n return acc;\n }, {});\n\n const [maxTerm, termCount] =\n Reflect.ownKeys(termCounts)\n .reduce((acc, key) => {\n if (acc[1] < termCounts[key]) {\n return [key, termCounts[key]];\n }\n\n return acc;\n }, [0, 0]);\n\n // We found a splitter that was used more than once, so it\n // is probably the breadcrumber. Split our title on that instead.\n // Note: max_term should be <= 4 characters, so that \" >> \"\n // will match, but nothing longer than that.\n if (termCount >= 2 && maxTerm.length <= 4) {\n splitTitle = text.split(maxTerm);\n }\n\n const splitEnds = [splitTitle[0], splitTitle.slice(-1)];\n const longestEnd = splitEnds.reduce((acc, end) => acc.length > end.length ? acc : end, '');\n\n if (longestEnd.length > 10) {\n return longestEnd;\n }\n\n return text;\n }\n\n return null;\n}\n\nfunction cleanDomainFromTitle(splitTitle, url) {\n // Search the ends of the title, looking for bits that fuzzy match\n // the URL too closely. If one is found, discard it and return the\n // rest.\n //\n // Strip out the big TLDs - it just makes the matching a bit more\n // accurate. Not the end of the world if it doesn't strip right.\n const { host } = URL.parse(url);\n const nakedDomain = host.replace(DOMAIN_ENDINGS_RE, '');\n\n const startSlug = splitTitle[0].toLowerCase().replace(' ', '');\n const startSlugRatio = wuzzy.levenshtein(startSlug, nakedDomain);\n\n if (startSlugRatio > 0.4 && startSlug.length > 5) {\n return splitTitle.slice(2).join('');\n }\n\n const endSlug = splitTitle.slice(-1)[0].toLowerCase().replace(' ', '');\n const endSlugRatio = wuzzy.levenshtein(endSlug, nakedDomain);\n\n if (endSlugRatio > 0.4 && endSlug.length >= 5) {\n return splitTitle.slice(0, -2).join('');\n }\n\n return null;\n}\n\n// Given a title with separators in it (colons, dashes, etc),\n// resolve whether any of the segments should be removed.\nexport default function resolveSplitTitle(title, url = '') {\n // Splits while preserving splitters, like:\n // ['The New New York', ' - ', 'The Washington Post']\n const splitTitle = title.split(TITLE_SPLITTERS_RE);\n if (splitTitle.length === 1) {\n return title;\n }\n\n let newTitle = extractBreadcrumbTitle(splitTitle, title);\n if (newTitle) return newTitle;\n\n newTitle = cleanDomainFromTitle(splitTitle, url);\n if (newTitle) return newTitle;\n\n // Fuzzy ratio didn't find anything, so this title is probably legit.\n // Just return it all.\n return title;\n}\n","import cleanAuthor from './author';\nimport cleanImage from './lead-image-url';\nimport cleanDek from './dek';\nimport cleanDatePublished from './date-published';\nimport cleanContent from './content';\nimport cleanTitle from './title';\n\nconst Cleaners = {\n author: cleanAuthor,\n lead_image_url: cleanImage,\n dek: cleanDek,\n date_published: cleanDatePublished,\n content: cleanContent,\n title: cleanTitle,\n};\n\nexport default Cleaners;\n\nexport { cleanAuthor };\nexport { cleanImage };\nexport { cleanDek };\nexport { cleanDatePublished };\nexport { cleanContent };\nexport { cleanTitle };\nexport { default as resolveSplitTitle } from './resolve-split-title';\n","import {\n stripUnlikelyCandidates,\n convertToParagraphs,\n} from 'utils/dom';\n\nimport {\n scoreContent,\n findTopCandidate,\n} from './scoring';\n\n// Using a variety of scoring techniques, extract the content most\n// likely to be article text.\n//\n// If strip_unlikely_candidates is True, remove any elements that\n// match certain criteria first. (Like, does this element have a\n// classname of \"comment\")\n//\n// If weight_nodes is True, use classNames and IDs to determine the\n// worthiness of nodes.\n//\n// Returns a cheerio object $\nexport default function extractBestNode($, opts) {\n // clone the node so we can get back to our\n // initial parsed state if needed\n // TODO Do I need this? – AP\n // let $root = $.root().clone()\n\n if (opts.stripUnlikelyCandidates) {\n $ = stripUnlikelyCandidates($);\n }\n\n $ = convertToParagraphs($);\n $ = scoreContent($, opts.weightNodes);\n const $topCandidate = findTopCandidate($);\n\n return $topCandidate;\n}\n","import cheerio from 'cheerio';\n\nimport { nodeIsSufficient } from 'utils/dom';\nimport { cleanContent } from 'cleaners';\nimport { normalizeSpaces } from 'utils/text';\n\nimport extractBestNode from './extract-best-node';\n\nconst GenericContentExtractor = {\n defaultOpts: {\n stripUnlikelyCandidates: true,\n weightNodes: true,\n cleanConditionally: true,\n },\n\n // Extract the content for this resource - initially, pass in our\n // most restrictive opts which will return the highest quality\n // content. On each failure, retry with slightly more lax opts.\n //\n // :param return_type: string. If \"node\", should return the content\n // as a cheerio node rather than as an HTML string.\n //\n // Opts:\n // stripUnlikelyCandidates: Remove any elements that match\n // non-article-like criteria first.(Like, does this element\n // have a classname of \"comment\")\n //\n // weightNodes: Modify an elements score based on whether it has\n // certain classNames or IDs. Examples: Subtract if a node has\n // a className of 'comment', Add if a node has an ID of\n // 'entry-content'.\n //\n // cleanConditionally: Clean the node to return of some\n // superfluous content. Things like forms, ads, etc.\n extract({ $, html, title, url }, opts) {\n opts = { ...this.defaultOpts, ...opts };\n\n $ = $ || cheerio.load(html);\n\n // Cascade through our extraction-specific opts in an ordered fashion,\n // turning them off as we try to extract content.\n let node = this.getContentNode($, title, url, opts);\n\n if (nodeIsSufficient(node)) {\n return this.cleanAndReturnNode(node, $);\n }\n\n // We didn't succeed on first pass, one by one disable our\n // extraction opts and try again.\n for (const key of Reflect.ownKeys(opts).filter(k => opts[k] === true)) {\n opts[key] = false;\n $ = cheerio.load(html);\n\n node = this.getContentNode($, title, url, opts);\n\n if (nodeIsSufficient(node)) {\n break;\n }\n }\n\n return this.cleanAndReturnNode(node, $);\n },\n\n // Get node given current options\n getContentNode($, title, url, opts) {\n return cleanContent(\n extractBestNode($, opts),\n {\n $,\n cleanConditionally: opts.cleanConditionally,\n title,\n url,\n });\n },\n\n // Once we got here, either we're at our last-resort node, or\n // we broke early. Make sure we at least have -something- before we\n // move forward.\n cleanAndReturnNode(node, $) {\n if (!node) {\n return null;\n }\n\n return normalizeSpaces($.html(node));\n\n // if return_type == \"html\":\n // return normalize_spaces(node_to_html(node))\n // else:\n // return node\n },\n\n};\n\nexport default GenericContentExtractor;\n","// TODO: It would be great if we could merge the meta and selector lists into\n// a list of objects, because we could then rank them better. For example,\n// .hentry .entry-title is far better suited than .\n\n// An ordered list of meta tag names that denote likely article titles. All\n// attributes should be lowercase for faster case-insensitive matching. From\n// most distinct to least distinct.\nexport const STRONG_TITLE_META_TAGS = [\n 'tweetmeme-title',\n 'dc.title',\n 'rbtitle',\n 'headline',\n 'title',\n];\n\n// og:title is weak because it typically contains context that we don't like,\n// for example the source site's name. Gotta get that brand into facebook!\nexport const WEAK_TITLE_META_TAGS = [\n 'og:title',\n];\n\n// An ordered list of XPath Selectors to find likely article titles. From\n// most explicit to least explicit.\n//\n// Note - this does not use classes like CSS. This checks to see if the string\n// exists in the className, which is not as accurate as .className (which\n// splits on spaces/endlines), but for our purposes it's close enough. The\n// speed tradeoff is worth the accuracy hit.\nexport const STRONG_TITLE_SELECTORS = [\n '.hentry .entry-title',\n 'h1#articleHeader',\n 'h1.articleHeader',\n 'h1.article',\n '.instapaper_title',\n '#meebo-title',\n];\n\nexport const WEAK_TITLE_SELECTORS = [\n 'article h1',\n '#entry-title',\n '.entry-title',\n '#entryTitle',\n '#entrytitle',\n '.entryTitle',\n '.entrytitle',\n '#articleTitle',\n '.articleTitle',\n 'post post-title',\n 'h1.title',\n 'h2.article',\n 'h1',\n 'html head title',\n 'title',\n];\n","import { cleanTitle } from 'cleaners';\nimport {\n extractFromMeta,\n extractFromSelectors,\n} from 'utils/dom';\n\nimport {\n STRONG_TITLE_META_TAGS,\n WEAK_TITLE_META_TAGS,\n STRONG_TITLE_SELECTORS,\n WEAK_TITLE_SELECTORS,\n} from './constants';\n\nconst GenericTitleExtractor = {\n extract({ $, url, metaCache }) {\n // First, check to see if we have a matching meta tag that we can make\n // use of that is strongly associated with the headline.\n let title;\n\n title = extractFromMeta($, STRONG_TITLE_META_TAGS, metaCache);\n if (title) return cleanTitle(title, { url, $ });\n\n // Second, look through our content selectors for the most likely\n // article title that is strongly associated with the headline.\n title = extractFromSelectors($, STRONG_TITLE_SELECTORS);\n if (title) return cleanTitle(title, { url, $ });\n\n // Third, check for weaker meta tags that may match.\n title = extractFromMeta($, WEAK_TITLE_META_TAGS, metaCache);\n if (title) return cleanTitle(title, { url, $ });\n\n // Last, look for weaker selector tags that may match.\n title = extractFromSelectors($, WEAK_TITLE_SELECTORS);\n if (title) return cleanTitle(title, { url, $ });\n\n // If no matches, return an empty string\n return '';\n },\n};\n\nexport default GenericTitleExtractor;\n","// An ordered list of meta tag names that denote likely article authors. All\n// attributes should be lowercase for faster case-insensitive matching. From\n// most distinct to least distinct.\n//\n// Note: \"author\" is too often the -developer- of the page, so it is not\n// added here.\nexport const AUTHOR_META_TAGS = [\n 'byl',\n 'clmst',\n 'dc.author',\n 'dcsext.author',\n 'dc.creator',\n 'rbauthors',\n 'authors',\n];\n\nexport const AUTHOR_MAX_LENGTH = 300;\n\n// An ordered list of XPath Selectors to find likely article authors. From\n// most explicit to least explicit.\n//\n// Note - this does not use classes like CSS. This checks to see if the string\n// exists in the className, which is not as accurate as .className (which\n// splits on spaces/endlines), but for our purposes it's close enough. The\n// speed tradeoff is worth the accuracy hit.\nexport const AUTHOR_SELECTORS = [\n '.entry .entry-author',\n '.author.vcard .fn',\n '.author .vcard .fn',\n '.byline.vcard .fn',\n '.byline .vcard .fn',\n '.byline .by .author',\n '.byline .by',\n '.byline .author',\n '.post-author.vcard',\n '.post-author .vcard',\n 'a[rel=author]',\n '#by_author',\n '.by_author',\n '#entryAuthor',\n '.entryAuthor',\n '.byline a[href*=author]',\n '#author .authorname',\n '.author .authorname',\n '#author',\n '.author',\n '.articleauthor',\n '.ArticleAuthor',\n '.byline',\n];\n\n// An ordered list of Selectors to find likely article authors, with\n// regular expression for content.\nconst bylineRe = /^[\\n\\s]*By/i;\nexport const BYLINE_SELECTORS_RE = [\n ['#byline', bylineRe],\n ['.byline', bylineRe],\n];\n","import { cleanAuthor } from 'cleaners';\nimport {\n extractFromMeta,\n extractFromSelectors,\n} from 'utils/dom';\n\nimport {\n AUTHOR_META_TAGS,\n AUTHOR_MAX_LENGTH,\n AUTHOR_SELECTORS,\n BYLINE_SELECTORS_RE,\n} from './constants';\n\nconst GenericAuthorExtractor = {\n extract({ $, metaCache }) {\n let author;\n\n // First, check to see if we have a matching\n // meta tag that we can make use of.\n author = extractFromMeta($, AUTHOR_META_TAGS, metaCache);\n if (author && author.length < AUTHOR_MAX_LENGTH) {\n return cleanAuthor(author);\n }\n\n // Second, look through our selectors looking for potential authors.\n author = extractFromSelectors($, AUTHOR_SELECTORS, 2);\n if (author && author.length < AUTHOR_MAX_LENGTH) {\n return cleanAuthor(author);\n }\n\n // Last, use our looser regular-expression based selectors for\n // potential authors.\n for (const [selector, regex] of BYLINE_SELECTORS_RE) {\n const node = $(selector);\n if (node.length === 1) {\n const text = node.text();\n if (regex.test(text)) {\n return cleanAuthor(text);\n }\n }\n }\n\n return null;\n },\n};\n\nexport default GenericAuthorExtractor;\n","// An ordered list of meta tag names that denote\n// likely date published dates. All attributes\n// should be lowercase for faster case-insensitive matching.\n// From most distinct to least distinct.\nexport const DATE_PUBLISHED_META_TAGS = [\n 'article:published_time',\n 'displaydate',\n 'dc.date',\n 'dc.date.issued',\n 'rbpubdate',\n 'publish_date',\n 'pub_date',\n 'pagedate',\n 'pubdate',\n 'revision_date',\n 'doc_date',\n 'date_created',\n 'content_create_date',\n 'lastmodified',\n 'created',\n 'date',\n];\n\n// An ordered list of XPath Selectors to find\n// likely date published dates. From most explicit\n// to least explicit.\nexport const DATE_PUBLISHED_SELECTORS = [\n '.hentry .dtstamp.published',\n '.hentry .published',\n '.hentry .dtstamp.updated',\n '.hentry .updated',\n '.single .published',\n '.meta .published',\n '.meta .postDate',\n '.entry-date',\n '.byline .date',\n '.postmetadata .date',\n '.article_datetime',\n '.date-header',\n '.story-date',\n '.dateStamp',\n '#story .datetime',\n '.dateline',\n '.pubdate',\n];\n\n// An ordered list of compiled regular expressions to find likely date\n// published dates from the URL. These should always have the first\n// reference be a date string that is parseable by dateutil.parser.parse\nconst abbrevMonthsStr = '(jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec)';\nexport const DATE_PUBLISHED_URL_RES = [\n // /2012/01/27/ but not /2012/01/293\n new RegExp('/(20\\\\d{2}/\\\\d{2}/\\\\d{2})/', 'i'),\n // 20120127 or 20120127T but not 2012012733 or 8201201733\n // /[^0-9](20\\d{2}[01]\\d[0-3]\\d)([^0-9]|$)/i,\n // 2012-01-27\n new RegExp('(20\\\\d{2}-[01]\\\\d-[0-3]\\\\d)', 'i'),\n // /2012/jan/27/\n new RegExp(`/(20\\\\d{2}/${abbrevMonthsStr}/[0-3]\\\\d)/`, 'i'),\n];\n","import { cleanDatePublished } from 'cleaners';\nimport {\n extractFromMeta,\n extractFromSelectors,\n} from 'utils/dom';\nimport { extractFromUrl } from 'utils/text';\n\nimport {\n DATE_PUBLISHED_META_TAGS,\n DATE_PUBLISHED_SELECTORS,\n DATE_PUBLISHED_URL_RES,\n} from './constants';\n\nconst GenericDatePublishedExtractor = {\n extract({ $, url, metaCache }) {\n let datePublished;\n // First, check to see if we have a matching meta tag\n // that we can make use of.\n // Don't try cleaning tags from this string\n datePublished = extractFromMeta($, DATE_PUBLISHED_META_TAGS, metaCache, false);\n if (datePublished) return cleanDatePublished(datePublished);\n\n // Second, look through our selectors looking for potential\n // date_published's.\n datePublished = extractFromSelectors($, DATE_PUBLISHED_SELECTORS);\n if (datePublished) return cleanDatePublished(datePublished);\n\n // Lastly, look to see if a dately string exists in the URL\n datePublished = extractFromUrl(url, DATE_PUBLISHED_URL_RES);\n if (datePublished) return cleanDatePublished(datePublished);\n\n return null;\n },\n};\n\nexport default GenericDatePublishedExtractor;\n","// import {\n// DEK_META_TAGS,\n// DEK_SELECTORS,\n// DEK_URL_RES,\n// } from './constants';\n\n// import { cleanDek } from 'cleaners';\n\n// import {\n// extractFromMeta,\n// extractFromSelectors,\n// } from 'utils/dom';\n\n// Currently there is only one selector for\n// deks. We should simply return null here\n// until we have a more robust generic option.\n// Below is the original source for this, for reference.\nconst GenericDekExtractor = {\n // extract({ $, content, metaCache }) {\n extract() {\n return null;\n },\n};\n\nexport default GenericDekExtractor;\n\n// def extract_dek(self):\n// # First, check to see if we have a matching meta tag that we can make\n// # use of.\n// dek = self.extract_from_meta('dek', constants.DEK_META_TAGS)\n// if not dek:\n// # Second, look through our CSS/XPath selectors. This may return\n// # an HTML fragment.\n// dek = self.extract_from_selectors('dek',\n// constants.DEK_SELECTORS,\n// text_only=False)\n//\n// if dek:\n// # Make sure our dek isn't in the first few thousand characters\n// # of the content, otherwise it's just the start of the article\n// # and not a true dek.\n// content = self.extract_content()\n// content_chunk = normalize_spaces(strip_tags(content[:2000]))\n// dek_chunk = normalize_spaces(dek[:100]) # Already has no tags.\n//\n// # 80% or greater similarity means the dek was very similar to some\n// # of the starting content, so we skip it.\n// if fuzz.partial_ratio(content_chunk, dek_chunk) < 80:\n// return dek\n//\n// return None\n","// An ordered list of meta tag names that denote likely article leading images.\n// All attributes should be lowercase for faster case-insensitive matching.\n// From most distinct to least distinct.\nexport const LEAD_IMAGE_URL_META_TAGS = [\n 'og:image',\n 'twitter:image',\n 'image_src',\n];\n\nexport const LEAD_IMAGE_URL_SELECTORS = [\n 'link[rel=image_src]',\n];\n\nexport const POSITIVE_LEAD_IMAGE_URL_HINTS = [\n 'upload',\n 'wp-content',\n 'large',\n 'photo',\n 'wp-image',\n];\nexport const POSITIVE_LEAD_IMAGE_URL_HINTS_RE = new RegExp(POSITIVE_LEAD_IMAGE_URL_HINTS.join('|'), 'i');\n\nexport const NEGATIVE_LEAD_IMAGE_URL_HINTS = [\n 'spacer',\n 'sprite',\n 'blank',\n 'throbber',\n 'gradient',\n 'tile',\n 'bg',\n 'background',\n 'icon',\n 'social',\n 'header',\n 'hdr',\n 'advert',\n 'spinner',\n 'loader',\n 'loading',\n 'default',\n 'rating',\n 'share',\n 'facebook',\n 'twitter',\n 'theme',\n 'promo',\n 'ads',\n 'wp-includes',\n];\nexport const NEGATIVE_LEAD_IMAGE_URL_HINTS_RE = new RegExp(NEGATIVE_LEAD_IMAGE_URL_HINTS.join('|'), 'i');\n\nexport const GIF_RE = /\\.gif(\\?.*)?$/i;\nexport const JPG_RE = /\\.jpe?g(\\?.*)?$/i;\n","import {\n POSITIVE_LEAD_IMAGE_URL_HINTS_RE,\n NEGATIVE_LEAD_IMAGE_URL_HINTS_RE,\n GIF_RE,\n JPG_RE,\n} from './constants';\n\nimport { PHOTO_HINTS_RE } from '../content/scoring/constants';\n\nfunction getSig($node) {\n return `${$node.attr('class') || ''} ${$node.attr('id') || ''}`;\n}\n\n// Scores image urls based on a variety of heuristics.\nexport function scoreImageUrl(url) {\n url = url.trim();\n let score = 0;\n\n if (POSITIVE_LEAD_IMAGE_URL_HINTS_RE.test(url)) {\n score += 20;\n }\n\n if (NEGATIVE_LEAD_IMAGE_URL_HINTS_RE.test(url)) {\n score -= 20;\n }\n\n // TODO: We might want to consider removing this as\n // gifs are much more common/popular than they once were\n if (GIF_RE.test(url)) {\n score -= 10;\n }\n\n if (JPG_RE.test(url)) {\n score += 10;\n }\n\n // PNGs are neutral.\n\n return score;\n}\n\n// Alt attribute usually means non-presentational image.\nexport function scoreAttr($img) {\n if ($img.attr('alt')) {\n return 5;\n }\n\n return 0;\n}\n\n// Look through our parent and grandparent for figure-like\n// container elements, give a bonus if we find them\nexport function scoreByParents($img) {\n let score = 0;\n const $figParent = $img.parents('figure').first();\n\n if ($figParent.length === 1) {\n score += 25;\n }\n\n const $parent = $img.parent();\n let $gParent;\n if ($parent.length === 1) {\n $gParent = $parent.parent();\n }\n\n [$parent, $gParent].forEach(($node) => {\n if (PHOTO_HINTS_RE.test(getSig($node))) {\n score += 15;\n }\n });\n\n return score;\n}\n\n// Look at our immediate sibling and see if it looks like it's a\n// caption. Bonus if so.\nexport function scoreBySibling($img) {\n let score = 0;\n const $sibling = $img.next();\n const sibling = $sibling.get(0);\n\n if (sibling && sibling.tagName.toLowerCase() === 'figcaption') {\n score += 25;\n }\n\n if (PHOTO_HINTS_RE.test(getSig($sibling))) {\n score += 15;\n }\n\n return score;\n}\n\nexport function scoreByDimensions($img) {\n let score = 0;\n\n const width = parseFloat($img.attr('width'));\n const height = parseFloat($img.attr('height'));\n const src = $img.attr('src');\n\n // Penalty for skinny images\n if (width && width <= 50) {\n score -= 50;\n }\n\n // Penalty for short images\n if (height && height <= 50) {\n score -= 50;\n }\n\n if (width && height && !src.includes('sprite')) {\n const area = width * height;\n if (area < 5000) { // Smaller than 50 x 100\n score -= 100;\n } else {\n score += Math.round(area / 1000);\n }\n }\n\n return score;\n}\n\nexport function scoreByPosition($imgs, index) {\n return ($imgs.length / 2) - index;\n}\n","import { extractFromMeta } from 'utils/dom';\nimport { cleanImage } from 'cleaners';\n\nimport {\n LEAD_IMAGE_URL_META_TAGS,\n LEAD_IMAGE_URL_SELECTORS,\n} from './constants';\n\nimport {\n scoreImageUrl,\n scoreAttr,\n scoreByParents,\n scoreBySibling,\n scoreByDimensions,\n scoreByPosition,\n} from './score-image';\n\n// Given a resource, try to find the lead image URL from within\n// it. Like content and next page extraction, uses a scoring system\n// to determine what the most likely image may be. Short circuits\n// on really probable things like og:image meta tags.\n//\n// Potential signals to still take advantage of:\n// * domain\n// * weird aspect ratio\nconst GenericLeadImageUrlExtractor = {\n extract({ $, content, metaCache, html }) {\n let cleanUrl;\n if (!$.browser && $('head').length === 0) {\n $('*').first().prepend(html);\n }\n\n // Check to see if we have a matching meta tag that we can make use of.\n // Moving this higher because common practice is now to use large\n // images on things like Open Graph or Twitter cards.\n // images usually have for things like Open Graph.\n const imageUrl =\n extractFromMeta(\n $,\n LEAD_IMAGE_URL_META_TAGS,\n metaCache,\n false\n );\n\n if (imageUrl) {\n cleanUrl = cleanImage(imageUrl);\n\n if (cleanUrl) return cleanUrl;\n }\n\n // Next, try to find the \"best\" image via the content.\n // We'd rather not have to fetch each image and check dimensions,\n // so try to do some analysis and determine them instead.\n const $content = $(content);\n const imgs = $('img', $content).toArray();\n const imgScores = {};\n\n imgs.forEach((img, index) => {\n const $img = $(img);\n const src = $img.attr('src');\n\n if (!src) return;\n\n let score = scoreImageUrl(src);\n score += scoreAttr($img);\n score += scoreByParents($img);\n score += scoreBySibling($img);\n score += scoreByDimensions($img);\n score += scoreByPosition(imgs, index);\n\n imgScores[src] = score;\n });\n\n const [topUrl, topScore] =\n Reflect.ownKeys(imgScores).reduce((acc, key) =>\n imgScores[key] > acc[1] ? [key, imgScores[key]] : acc\n , [null, 0]);\n\n if (topScore > 0) {\n cleanUrl = cleanImage(topUrl);\n\n if (cleanUrl) return cleanUrl;\n }\n\n // If nothing else worked, check to see if there are any really\n // probable nodes in the doc, like .\n for (const selector of LEAD_IMAGE_URL_SELECTORS) {\n const $node = $(selector).first();\n const src = $node.attr('src');\n if (src) {\n cleanUrl = cleanImage(src);\n if (cleanUrl) return cleanUrl;\n }\n\n const href = $node.attr('href');\n if (href) {\n cleanUrl = cleanImage(href);\n if (cleanUrl) return cleanUrl;\n }\n\n const value = $node.attr('value');\n if (value) {\n cleanUrl = cleanImage(value);\n if (cleanUrl) return cleanUrl;\n }\n }\n\n return null;\n },\n};\n\nexport default GenericLeadImageUrlExtractor;\n\n// def extract(self):\n// \"\"\"\n// # First, try to find the \"best\" image via the content.\n// # We'd rather not have to fetch each image and check dimensions,\n// # so try to do some analysis and determine them instead.\n// content = self.extractor.extract_content(return_type=\"node\")\n// imgs = content.xpath('.//img')\n// img_scores = defaultdict(int)\n// logger.debug('Scoring %d images from content', len(imgs))\n// for (i, img) in enumerate(imgs):\n// img_score = 0\n//\n// if not 'src' in img.attrib:\n// logger.debug('No src attribute found')\n// continue\n//\n// try:\n// parsed_img = urlparse(img.attrib['src'])\n// img_path = parsed_img.path.lower()\n// except ValueError:\n// logger.debug('ValueError getting img path.')\n// continue\n// logger.debug('Image path is %s', img_path)\n//\n// if constants.POSITIVE_LEAD_IMAGE_URL_HINTS_RE.match(img_path):\n// logger.debug('Positive URL hints match. Adding 20.')\n// img_score += 20\n//\n// if constants.NEGATIVE_LEAD_IMAGE_URL_HINTS_RE.match(img_path):\n// logger.debug('Negative URL hints match. Subtracting 20.')\n// img_score -= 20\n//\n// # Gifs are more often structure than photos\n// if img_path.endswith('gif'):\n// logger.debug('gif found. Subtracting 10.')\n// img_score -= 10\n//\n// # JPGs are more often photographs\n// if img_path.endswith('jpg'):\n// logger.debug('jpg found. Adding 10.')\n// img_score += 10\n//\n// # PNGs are neutral.\n//\n// # Alt attribute usually means non-presentational image.\n// if 'alt' in img.attrib and len(img.attrib['alt']) > 5:\n// logger.debug('alt attribute found. Adding 5.')\n// img_score += 5\n//\n// # Look through our parent and grandparent for figure-like\n// # container elements, give a bonus if we find them\n// parents = [img.getparent()]\n// if parents[0] is not None and parents[0].getparent() is not None:\n// parents.append(parents[0].getparent())\n// for p in parents:\n// if p.tag == 'figure':\n// logger.debug('Parent with
tag found. Adding 25.')\n// img_score += 25\n//\n// p_sig = ' '.join([p.get('id', ''), p.get('class', '')])\n// if constants.PHOTO_HINTS_RE.search(p_sig):\n// logger.debug('Photo hints regex match. Adding 15.')\n// img_score += 15\n//\n// # Look at our immediate sibling and see if it looks like it's a\n// # caption. Bonus if so.\n// sibling = img.getnext()\n// if sibling is not None:\n// if sibling.tag == 'figcaption':\n// img_score += 25\n//\n// sib_sig = ' '.join([sibling.get('id', ''),\n// sibling.get('class', '')]).lower()\n// if 'caption' in sib_sig:\n// img_score += 15\n//\n// # Pull out width/height if they were set.\n// img_width = None\n// img_height = None\n// if 'width' in img.attrib:\n// try:\n// img_width = float(img.get('width'))\n// except ValueError:\n// pass\n// if 'height' in img.attrib:\n// try:\n// img_height = float(img.get('height'))\n// except ValueError:\n// pass\n//\n// # Penalty for skinny images\n// if img_width and img_width <= 50:\n// logger.debug('Skinny image found. Subtracting 50.')\n// img_score -= 50\n//\n// # Penalty for short images\n// if img_height and img_height <= 50:\n// # Wide, short images are more common than narrow, tall ones\n// logger.debug('Short image found. Subtracting 25.')\n// img_score -= 25\n//\n// if img_width and img_height and not 'sprite' in img_path:\n// area = img_width * img_height\n//\n// if area < 5000: # Smaller than 50x100\n// logger.debug('Image with small area found. Subtracting 100.')\n// img_score -= 100\n// else:\n// img_score += round(area/1000.0)\n//\n// # If the image is higher on the page than other images,\n// # it gets a bonus. Penalty if lower.\n// logger.debug('Adding page placement bonus of %d.', len(imgs)/2 - i)\n// img_score += len(imgs)/2 - i\n//\n// # Use the raw src here because we munged img_path for case\n// # insensitivity\n// logger.debug('Final score is %d.', img_score)\n// img_scores[img.attrib['src']] += img_score\n//\n// top_score = 0\n// top_url = None\n// for (url, score) in img_scores.items():\n// if score > top_score:\n// top_url = url\n// top_score = score\n//\n// if top_score > 0:\n// logger.debug('Using top score image from content. Score was %d', top_score)\n// return top_url\n//\n//\n// # If nothing else worked, check to see if there are any really\n// # probable nodes in the doc, like .\n// logger.debug('Trying to find lead image in probable nodes')\n// for selector in constants.LEAD_IMAGE_URL_SELECTORS:\n// nodes = self.resource.extract_by_selector(selector)\n// for node in nodes:\n// clean_value = None\n// if node.attrib.get('src'):\n// clean_value = self.clean(node.attrib['src'])\n//\n// if not clean_value and node.attrib.get('href'):\n// clean_value = self.clean(node.attrib['href'])\n//\n// if not clean_value and node.attrib.get('value'):\n// clean_value = self.clean(node.attrib['value'])\n//\n// if clean_value:\n// logger.debug('Found lead image in probable nodes.')\n// logger.debug('Node was: %s', node)\n// return clean_value\n//\n// return None\n","import difflib from 'difflib';\n\nexport default function scoreSimilarity(score, articleUrl, href) {\n // Do this last and only if we have a real candidate, because it's\n // potentially expensive computationally. Compare the link to this\n // URL using difflib to get the % similarity of these URLs. On a\n // sliding scale, subtract points from this link based on\n // similarity.\n if (score > 0) {\n const similarity = new difflib.SequenceMatcher(null, articleUrl, href).ratio();\n // Subtract .1 from diff_percent when calculating modifier,\n // which means that if it's less than 10% different, we give a\n // bonus instead. Ex:\n // 3% different = +17.5 points\n // 10% different = 0 points\n // 20% different = -25 points\n const diffPercent = 1.0 - similarity;\n const diffModifier = -(250 * (diffPercent - 0.2));\n return score + diffModifier;\n }\n\n return 0;\n}\n","import { IS_DIGIT_RE } from 'utils/text/constants';\n\nexport default function scoreLinkText(linkText, pageNum) {\n // If the link text can be parsed as a number, give it a minor\n // bonus, with a slight bias towards lower numbered pages. This is\n // so that pages that might not have 'next' in their text can still\n // get scored, and sorted properly by score.\n let score = 0;\n\n if (IS_DIGIT_RE.test(linkText.trim())) {\n const linkTextAsNum = parseInt(linkText, 10);\n // If it's the first page, we already got it on the first call.\n // Give it a negative score. Otherwise, up to page 10, give a\n // small bonus.\n if (linkTextAsNum < 2) {\n score = -30;\n } else {\n score = Math.max(0, 10 - linkTextAsNum);\n }\n\n // If it appears that the current page number is greater than\n // this links page number, it's a very bad sign. Give it a big\n // penalty.\n if (pageNum && pageNum >= linkTextAsNum) {\n score -= 50;\n }\n }\n\n return score;\n}\n","export default function scorePageInLink(pageNum, isWp) {\n // page in the link = bonus. Intentionally ignore wordpress because\n // their ?p=123 link style gets caught by this even though it means\n // separate documents entirely.\n if (pageNum && !isWp) {\n return 50;\n }\n\n return 0;\n}\n","export const DIGIT_RE = /\\d/;\n\n// A list of words that, if found in link text or URLs, likely mean that\n// this link is not a next page link.\nexport const EXTRANEOUS_LINK_HINTS = [\n 'print',\n 'archive',\n 'comment',\n 'discuss',\n 'e-mail',\n 'email',\n 'share',\n 'reply',\n 'all',\n 'login',\n 'sign',\n 'single',\n 'adx',\n 'entry-unrelated',\n];\nexport const EXTRANEOUS_LINK_HINTS_RE = new RegExp(EXTRANEOUS_LINK_HINTS.join('|'), 'i');\n\n// Match any link text/classname/id that looks like it could mean the next\n// page. Things like: next, continue, >, >>, » but not >|, »| as those can\n// mean last page.\nexport const NEXT_LINK_TEXT_RE = new RegExp('(next|weiter|continue|>([^|]|$)|»([^|]|$))', 'i');\n\n// Match any link text/classname/id that looks like it is an end link: things\n// like \"first\", \"last\", \"end\", etc.\nexport const CAP_LINK_TEXT_RE = new RegExp('(first|last|end)', 'i');\n\n// Match any link text/classname/id that looks like it means the previous\n// page.\nexport const PREV_LINK_TEXT_RE = new RegExp('(prev|earl|old|new|<|«)', 'i');\n\n// Match any phrase that looks like it could be page, or paging, or pagination\nexport const PAGE_RE = new RegExp('pag(e|ing|inat)', 'i');\n","import { EXTRANEOUS_LINK_HINTS_RE } from '../constants';\n\nexport default function scoreExtraneousLinks(href) {\n // If the URL itself contains extraneous values, give a penalty.\n if (EXTRANEOUS_LINK_HINTS_RE.test(href)) {\n return -25;\n }\n\n return 0;\n}\n","import { range } from 'utils';\nimport {\n NEGATIVE_SCORE_RE,\n POSITIVE_SCORE_RE,\n PAGE_RE,\n} from 'utils/dom/constants';\nimport { EXTRANEOUS_LINK_HINTS_RE } from '../constants';\n\nfunction makeSig($link) {\n return `${$link.attr('class') || ''} ${$link.attr('id') || ''}`;\n}\n\nexport default function scoreByParents($link) {\n // If a parent node contains paging-like classname or id, give a\n // bonus. Additionally, if a parent_node contains bad content\n // (like 'sponsor'), give a penalty.\n let $parent = $link.parent();\n let positiveMatch = false;\n let negativeMatch = false;\n let score = 0;\n\n Array.from(range(0, 4)).forEach(() => {\n if ($parent.length === 0) {\n return;\n }\n\n const parentData = makeSig($parent, ' ');\n\n // If we have 'page' or 'paging' in our data, that's a good\n // sign. Add a bonus.\n if (!positiveMatch && PAGE_RE.test(parentData)) {\n positiveMatch = true;\n score += 25;\n }\n\n // If we have 'comment' or something in our data, and\n // we don't have something like 'content' as well, that's\n // a bad sign. Give a penalty.\n if (!negativeMatch && NEGATIVE_SCORE_RE.test(parentData)\n && EXTRANEOUS_LINK_HINTS_RE.test(parentData)) {\n if (!POSITIVE_SCORE_RE.test(parentData)) {\n negativeMatch = true;\n score -= 25;\n }\n }\n\n $parent = $parent.parent();\n });\n\n return score;\n}\n","import { PREV_LINK_TEXT_RE } from '../constants';\n\nexport default function scorePrevLink(linkData) {\n // If the link has something like \"previous\", its definitely\n // an old link, skip it.\n if (PREV_LINK_TEXT_RE.test(linkData)) {\n return -200;\n }\n\n return 0;\n}\n","import URL from 'url';\n\nimport {\n DIGIT_RE,\n EXTRANEOUS_LINK_HINTS_RE,\n} from '../constants';\n\nexport default function shouldScore(\n href,\n articleUrl,\n baseUrl,\n parsedUrl,\n linkText,\n previousUrls\n) {\n // skip if we've already fetched this url\n if (previousUrls.find(url => href === url) !== undefined) {\n return false;\n }\n\n // If we've already parsed this URL, or the URL matches the base\n // URL, or is empty, skip it.\n if (!href || href === articleUrl || href === baseUrl) {\n return false;\n }\n\n const { hostname } = parsedUrl;\n const { hostname: linkHost } = URL.parse(href);\n\n // Domain mismatch.\n if (linkHost !== hostname) {\n return false;\n }\n\n // If href doesn't contain a digit after removing the base URL,\n // it's certainly not the next page.\n const fragment = href.replace(baseUrl, '');\n if (!DIGIT_RE.test(fragment)) {\n return false;\n }\n\n // This link has extraneous content (like \"comment\") in its link\n // text, so we skip it.\n if (EXTRANEOUS_LINK_HINTS_RE.test(linkText)) {\n return false;\n }\n\n // Next page link text is never long, skip if it is too long.\n if (linkText.length > 25) {\n return false;\n }\n\n return true;\n}\n","export default function scoreBaseUrl(href, baseRegex) {\n // If the baseUrl isn't part of this URL, penalize this\n // link. It could still be the link, but the odds are lower.\n // Example:\n // http://www.actionscript.org/resources/articles/745/1/JavaScript-and-VBScript-Injection-in-ActionScript-3/Page1.html\n if (!baseRegex.test(href)) {\n return -25;\n }\n\n return 0;\n}\n","import { NEXT_LINK_TEXT_RE } from '../constants';\n\nexport default function scoreNextLinkText(linkData) {\n // Things like \"next\", \">>\", etc.\n if (NEXT_LINK_TEXT_RE.test(linkData)) {\n return 50;\n }\n\n return 0;\n}\n","import {\n NEXT_LINK_TEXT_RE,\n CAP_LINK_TEXT_RE,\n} from '../constants';\n\nexport default function scoreCapLinks(linkData) {\n // Cap links are links like \"last\", etc.\n if (CAP_LINK_TEXT_RE.test(linkData)) {\n // If we found a link like \"last\", but we've already seen that\n // this link is also \"next\", it's fine. If it's not been\n // previously marked as \"next\", then it's probably bad.\n // Penalize.\n if (NEXT_LINK_TEXT_RE.test(linkData)) {\n return -65;\n }\n }\n\n return 0;\n}\n","import URL from 'url';\n\nimport {\n getAttrs,\n isWordpress,\n} from 'utils/dom';\nimport {\n removeAnchor,\n pageNumFromUrl,\n} from 'utils/text';\n\nimport {\n scoreSimilarity,\n scoreLinkText,\n scorePageInLink,\n scoreExtraneousLinks,\n scoreByParents,\n scorePrevLink,\n shouldScore,\n scoreBaseUrl,\n scoreCapLinks,\n scoreNextLinkText,\n} from './utils';\n\nexport function makeBaseRegex(baseUrl) {\n return new RegExp(`^${baseUrl}`, 'i');\n}\n\nfunction makeSig($link, linkText) {\n return `${linkText || $link.text()} ${$link.attr('class') || ''} ${$link.attr('id') || ''}`;\n}\n\nexport default function scoreLinks({\n links,\n articleUrl,\n baseUrl,\n parsedUrl,\n $,\n previousUrls = [],\n}) {\n parsedUrl = parsedUrl || URL.parse(articleUrl);\n const baseRegex = makeBaseRegex(baseUrl);\n const isWp = isWordpress($);\n\n // Loop through all links, looking for hints that they may be next-page\n // links. Things like having \"page\" in their textContent, className or\n // id, or being a child of a node with a page-y className or id.\n //\n // After we do that, assign each page a score, and pick the one that\n // looks most like the next page link, as long as its score is strong\n // enough to have decent confidence.\n const scoredPages = links.reduce((possiblePages, link) => {\n // Remove any anchor data since we don't do a good job\n // standardizing URLs (it's hard), we're going to do\n // some checking with and without a trailing slash\n const attrs = getAttrs(link);\n\n // if href is undefined, return\n if (!attrs.href) return possiblePages;\n\n const href = removeAnchor(attrs.href);\n const $link = $(link);\n const linkText = $link.text();\n\n if (!shouldScore(href, articleUrl, baseUrl, parsedUrl, linkText, previousUrls)) {\n return possiblePages;\n }\n\n // ## PASSED THE FIRST-PASS TESTS. Start scoring. ##\n if (!possiblePages[href]) {\n possiblePages[href] = {\n score: 0,\n linkText,\n href,\n };\n } else {\n possiblePages[href].linkText = `${possiblePages[href].linkText}|${linkText}`;\n }\n\n const possiblePage = possiblePages[href];\n const linkData = makeSig($link, linkText);\n const pageNum = pageNumFromUrl(href);\n\n let score = scoreBaseUrl(href, baseRegex);\n score += scoreNextLinkText(linkData);\n score += scoreCapLinks(linkData);\n score += scorePrevLink(linkData);\n score += scoreByParents($link);\n score += scoreExtraneousLinks(href);\n score += scorePageInLink(pageNum, isWp);\n score += scoreLinkText(linkText, pageNum);\n score += scoreSimilarity(score, articleUrl, href);\n\n possiblePage.score = score;\n\n return possiblePages;\n }, {});\n\n return Reflect.ownKeys(scoredPages).length === 0 ? null : scoredPages;\n}\n","import URL from 'url';\n\nimport {\n articleBaseUrl,\n removeAnchor,\n} from 'utils/text';\nimport scoreLinks from './scoring/score-links';\n\n// Looks for and returns next page url\n// for multi-page articles\nconst GenericNextPageUrlExtractor = {\n extract({ $, url, parsedUrl, previousUrls = [] }) {\n parsedUrl = parsedUrl || URL.parse(url);\n\n const articleUrl = removeAnchor(url);\n const baseUrl = articleBaseUrl(url, parsedUrl);\n\n const links = $('a[href]').toArray();\n\n const scoredLinks = scoreLinks({\n links,\n articleUrl,\n baseUrl,\n parsedUrl,\n $,\n previousUrls,\n });\n\n // If no links were scored, return null\n if (!scoredLinks) return null;\n\n // now that we've scored all possible pages,\n // find the biggest one.\n const topPage = Reflect.ownKeys(scoredLinks).reduce((acc, link) => {\n const scoredLink = scoredLinks[link];\n return scoredLink.score > acc.score ? scoredLink : acc;\n }, { score: -100 });\n\n // If the score is less than 50, we're not confident enough to use it,\n // so we fail.\n if (topPage.score >= 50) {\n return topPage.href;\n }\n\n return null;\n },\n};\n\nexport default GenericNextPageUrlExtractor;\n","export const CANONICAL_META_SELECTORS = [\n 'og:url',\n];\n","import URL from 'url';\nimport { extractFromMeta } from 'utils/dom';\n\nimport { CANONICAL_META_SELECTORS } from './constants';\n\nfunction parseDomain(url) {\n const parsedUrl = URL.parse(url);\n const { hostname } = parsedUrl;\n return hostname;\n}\n\nfunction result(url) {\n return {\n url,\n domain: parseDomain(url),\n };\n}\n\nconst GenericUrlExtractor = {\n extract({ $, url, metaCache }) {\n const $canonical = $('link[rel=canonical]');\n if ($canonical.length !== 0) {\n const href = $canonical.attr('href');\n if (href) {\n return result(href);\n }\n }\n\n const metaUrl = extractFromMeta($, CANONICAL_META_SELECTORS, metaCache);\n if (metaUrl) {\n return result(metaUrl);\n }\n\n return result(url);\n },\n\n};\n\nexport default GenericUrlExtractor;\n","export const EXCERPT_META_SELECTORS = [\n 'og:description',\n 'twitter:description',\n];\n","import ellipsize from 'ellipsize';\n\nimport {\n extractFromMeta,\n stripTags,\n} from 'utils/dom';\n\nimport { EXCERPT_META_SELECTORS } from './constants';\n\nexport function clean(content, $, maxLength = 200) {\n content = content.replace(/[\\s\\n]+/g, ' ').trim();\n return ellipsize(content, maxLength, { ellipse: '…' });\n}\n\nconst GenericExcerptExtractor = {\n extract({ $, content, metaCache }) {\n const excerpt = extractFromMeta($, EXCERPT_META_SELECTORS, metaCache);\n if (excerpt) {\n return clean(stripTags(excerpt, $));\n }\n // Fall back to excerpting from the extracted content\n const maxLength = 200;\n const shortContent = content.slice(0, maxLength * 5);\n return clean($(shortContent).text(), $, maxLength);\n },\n};\n\nexport default GenericExcerptExtractor;\n","import cheerio from 'cheerio';\n\nimport { normalizeSpaces } from 'utils/text';\n\nconst GenericWordCountExtractor = {\n extract({ content }) {\n const $ = cheerio.load(content);\n const $content = $('div').first();\n\n const text = normalizeSpaces($content.text());\n return text.split(/\\s/).length;\n },\n};\n\nexport default GenericWordCountExtractor;\n","import cheerio from 'cheerio';\nimport stringDirection from 'string-direction';\n\nimport GenericContentExtractor from './content/extractor';\nimport GenericTitleExtractor from './title/extractor';\nimport GenericAuthorExtractor from './author/extractor';\nimport GenericDatePublishedExtractor from './date-published/extractor';\nimport GenericDekExtractor from './dek/extractor';\nimport GenericLeadImageUrlExtractor from './lead-image-url/extractor';\nimport GenericNextPageUrlExtractor from './next-page-url/extractor';\nimport GenericUrlExtractor from './url/extractor';\nimport GenericExcerptExtractor from './excerpt/extractor';\nimport GenericWordCountExtractor from './word-count/extractor';\n\nconst GenericExtractor = {\n // This extractor is the default for all domains\n domain: '*',\n title: GenericTitleExtractor.extract,\n date_published: GenericDatePublishedExtractor.extract,\n author: GenericAuthorExtractor.extract,\n content: GenericContentExtractor.extract.bind(GenericContentExtractor),\n lead_image_url: GenericLeadImageUrlExtractor.extract,\n dek: GenericDekExtractor.extract,\n next_page_url: GenericNextPageUrlExtractor.extract,\n url_and_domain: GenericUrlExtractor.extract,\n excerpt: GenericExcerptExtractor.extract,\n word_count: GenericWordCountExtractor.extract,\n direction: ({ title }) => stringDirection.getDirection(title),\n\n extract(options) {\n const { html, $ } = options;\n\n if (html && !$) {\n const loaded = cheerio.load(html);\n options.$ = loaded;\n }\n\n const title = this.title(options);\n const date_published = this.date_published(options);\n const author = this.author(options);\n const content = this.content({ ...options, title });\n const lead_image_url = this.lead_image_url({ ...options, content });\n const dek = this.dek({ ...options, content });\n const next_page_url = this.next_page_url(options);\n const excerpt = this.excerpt({ ...options, content });\n const word_count = this.word_count({ ...options, content });\n const direction = this.direction({ title });\n const { url, domain } = this.url_and_domain(options);\n\n return {\n title,\n author,\n date_published: date_published || null,\n dek,\n lead_image_url,\n content,\n next_page_url,\n url,\n domain,\n excerpt,\n word_count,\n direction,\n };\n },\n};\n\nexport default GenericExtractor;\n","import {\n MediumExtractor,\n BloggerExtractor,\n} from './custom/';\n\nconst Detectors = {\n 'meta[name=\"al:ios:app_name\"][value=\"Medium\"]': MediumExtractor,\n 'meta[name=\"generator\"][value=\"blogger\"]': BloggerExtractor,\n};\n\nexport default function detectByHtml($) {\n const selector = Reflect.ownKeys(Detectors).find(s => $(s).length > 0);\n\n return Detectors[selector];\n}\n","import URL from 'url';\n\nimport Extractors from './all';\nimport GenericExtractor from './generic';\nimport detectByHtml from './detect-by-html';\n\nexport default function getExtractor(url, parsedUrl, $) {\n parsedUrl = parsedUrl || URL.parse(url);\n const { hostname } = parsedUrl;\n const baseDomain = hostname.split('.').slice(-2).join('.');\n\n return Extractors[hostname] || Extractors[baseDomain] ||\n detectByHtml($) || GenericExtractor;\n}\n","import Cleaners from 'cleaners';\nimport { convertNodeTo } from 'utils/dom';\nimport GenericExtractor from './generic';\n\n// Remove elements by an array of selectors\nexport function cleanBySelectors($content, $, { clean }) {\n if (!clean) return $content;\n\n $(clean.join(','), $content).remove();\n\n return $content;\n}\n\n// Transform matching elements\nexport function transformElements($content, $, { transforms }) {\n if (!transforms) return $content;\n\n Reflect.ownKeys(transforms).forEach((key) => {\n const $matches = $(key, $content);\n const value = transforms[key];\n\n // If value is a string, convert directly\n if (typeof value === 'string') {\n $matches.each((index, node) => {\n convertNodeTo($(node), $, transforms[key]);\n });\n } else if (typeof value === 'function') {\n // If value is function, apply function to node\n $matches.each((index, node) => {\n const result = value($(node), $);\n // If function returns a string, convert node to that value\n if (typeof result === 'string') {\n convertNodeTo($(node), $, result);\n }\n });\n }\n });\n\n return $content;\n}\n\nfunction findMatchingSelector($, selectors, extractHtml) {\n return selectors.find((selector) => {\n if (Array.isArray(selector)) {\n if (extractHtml) {\n return selector.reduce((acc, s) => acc && $(s).length > 0, true);\n }\n\n const [s, attr] = selector;\n return $(s).length === 1 && $(s).attr(attr) && $(s).attr(attr).trim() !== '';\n }\n\n return $(selector).length === 1 && $(selector).text().trim() !== '';\n });\n}\n\nexport function select(opts) {\n const { $, type, extractionOpts, extractHtml = false } = opts;\n // Skip if there's not extraction for this type\n if (!extractionOpts) return null;\n\n // If a string is hardcoded for a type (e.g., Wikipedia\n // contributors), return the string\n if (typeof extractionOpts === 'string') return extractionOpts;\n\n const { selectors, defaultCleaner = true } = extractionOpts;\n\n const matchingSelector = findMatchingSelector($, selectors, extractHtml);\n\n if (!matchingSelector) return null;\n\n // Declaring result; will contain either\n // text or html, which will be cleaned\n // by the appropriate cleaner type\n\n // If the selector type requests html as its return type\n // transform and clean the element with provided selectors\n let $content;\n if (extractHtml) {\n // If matching selector is an array, we're considering this a\n // multi-match selection, which allows the parser to choose several\n // selectors to include in the result. Note that all selectors in the\n // array must match in order for this selector to trigger\n if (Array.isArray(matchingSelector)) {\n $content = $(matchingSelector.join(','));\n const $wrapper = $('');\n $content.each((index, element) => {\n $wrapper.append(element);\n });\n\n $content = $wrapper;\n } else {\n $content = $(matchingSelector);\n }\n\n // Wrap in div so transformation can take place on root element\n $content.wrap($(''));\n $content = $content.parent();\n\n $content = transformElements($content, $, extractionOpts);\n $content = cleanBySelectors($content, $, extractionOpts);\n\n $content = Cleaners[type]($content, { ...opts, defaultCleaner });\n\n return $.html($content);\n }\n\n let result;\n\n // if selector is an array (e.g., ['img', 'src']),\n // extract the attr\n if (Array.isArray(matchingSelector)) {\n const [selector, attr] = matchingSelector;\n result = $(selector).attr(attr).trim();\n } else {\n let $node = $(matchingSelector);\n\n $node = cleanBySelectors($node, $, extractionOpts);\n $node = transformElements($node, $, extractionOpts);\n\n result = $node.text().trim();\n }\n\n // Allow custom extractor to skip default cleaner\n // for this type; defaults to true\n if (defaultCleaner) {\n return Cleaners[type](result, { ...opts, ...extractionOpts });\n }\n\n return result;\n}\n\nfunction extractResult(opts) {\n const { type, extractor, fallback = true } = opts;\n\n const result = select({ ...opts, extractionOpts: extractor[type] });\n\n // If custom parser succeeds, return the result\n if (result) {\n return result;\n }\n\n // If nothing matches the selector, and fallback is enabled,\n // run the Generic extraction\n if (fallback) return GenericExtractor[type](opts);\n\n return null;\n}\n\nconst RootExtractor = {\n extract(extractor = GenericExtractor, opts) {\n const { contentOnly, extractedTitle } = opts;\n // This is the generic extractor. Run its extract method\n if (extractor.domain === '*') return extractor.extract(opts);\n\n opts = {\n ...opts,\n extractor,\n };\n\n if (contentOnly) {\n const content = extractResult({\n ...opts, type: 'content', extractHtml: true, title: extractedTitle,\n });\n return {\n content,\n };\n }\n const title = extractResult({ ...opts, type: 'title' });\n const date_published = extractResult({ ...opts, type: 'date_published' });\n const author = extractResult({ ...opts, type: 'author' });\n const next_page_url = extractResult({ ...opts, type: 'next_page_url' });\n const content = extractResult({\n ...opts, type: 'content', extractHtml: true, title,\n });\n const lead_image_url = extractResult({ ...opts, type: 'lead_image_url', content });\n const excerpt = extractResult({ ...opts, type: 'excerpt', content });\n const dek = extractResult({ ...opts, type: 'dek', content, excerpt });\n const word_count = extractResult({ ...opts, type: 'word_count', content });\n const direction = extractResult({ ...opts, type: 'direction', title });\n const { url, domain } =\n extractResult({ ...opts, type: 'url_and_domain' }) || { url: null, domain: null };\n\n return {\n title,\n content,\n author,\n date_published,\n lead_image_url,\n dek,\n next_page_url,\n url,\n domain,\n excerpt,\n word_count,\n direction,\n };\n },\n};\n\nexport default RootExtractor;\n","import { removeAnchor } from 'utils/text';\nimport RootExtractor from 'extractors/root-extractor';\nimport GenericExtractor from 'extractors/generic';\nimport Resource from 'resource';\n\nexport default async function collectAllPages(\n {\n next_page_url,\n html,\n $,\n metaCache,\n result,\n Extractor,\n title,\n url,\n }\n) {\n // At this point, we've fetched just the first page\n let pages = 1;\n const previousUrls = [removeAnchor(url)];\n\n // If we've gone over 26 pages, something has\n // likely gone wrong.\n while (next_page_url && pages < 26) {\n pages += 1;\n $ = await Resource.create(next_page_url);\n html = $.html();\n\n const extractorOpts = {\n url: next_page_url,\n html,\n $,\n metaCache,\n contentOnly: true,\n extractedTitle: title,\n previousUrls,\n };\n\n const nextPageResult = RootExtractor.extract(Extractor, extractorOpts);\n\n previousUrls.push(next_page_url);\n result = {\n ...result,\n content: `${result.content}
` });\n return {\n ...result,\n total_pages: pages,\n pages_rendered: pages,\n word_count,\n };\n}\n","import URL from 'url';\nimport cheerio from 'cheerio';\n\nimport Resource from 'resource';\nimport {\n validateUrl,\n Errors,\n} from 'utils';\nimport getExtractor from 'extractors/get-extractor';\nimport RootExtractor from 'extractors/root-extractor';\nimport collectAllPages from 'extractors/collect-all-pages';\n\nconst Mercury = {\n async parse(url, html, opts = {}) {\n const {\n fetchAllPages = true,\n fallback = true,\n } = opts;\n\n // if no url was passed and this is the browser version,\n // set url to window.location.href and load the html\n // from the current page\n if (!url && cheerio.browser) {\n url = window.location.href; // eslint-disable-line no-undef\n html = html || cheerio.html();\n }\n\n const parsedUrl = URL.parse(url);\n\n if (!validateUrl(parsedUrl)) {\n return Errors.badUrl;\n }\n\n const $ = await Resource.create(url, html, parsedUrl);\n\n const Extractor = getExtractor(url, parsedUrl, $);\n // console.log(`Using extractor for ${Extractor.domain}`);\n\n // If we found an error creating the resource, return that error\n if ($.failed) {\n return $;\n }\n\n // if html still has not been set (i.e., url passed to Mercury.parse),\n // set html from the response of Resource.create\n if (!html) {\n html = $.html();\n }\n\n // Cached value of every meta name in our document.\n // Used when extracting title/author/date_published/dek\n const metaCache = $('meta').map((_, node) => $(node).attr('name')).toArray();\n\n let result = RootExtractor.extract(\n Extractor,\n {\n url,\n html,\n $,\n metaCache,\n parsedUrl,\n fallback,\n });\n\n const { title, next_page_url } = result;\n\n // Fetch more pages if next_page_url found\n if (fetchAllPages && next_page_url) {\n result = await collectAllPages(\n {\n Extractor,\n next_page_url,\n html,\n $,\n metaCache,\n result,\n title,\n url,\n }\n );\n } else {\n result = {\n ...result,\n total_pages: 1,\n rendered_pages: 1,\n };\n }\n\n return result;\n },\n\n browser: !!cheerio.browser,\n\n // A convenience method for getting a resource\n // to work with, e.g., for custom extractor generator\n async fetchResource(url) {\n return await Resource.create(url);\n },\n\n};\n\nexport default Mercury;\n"],"names":["NORMALIZE_RE","normalizeSpaces","text","replace","trim","extractFromUrl","url","regexList","matchRe","find","re","test","exec","PAGE_IN_HREF_RE","RegExp","HAS_ALPHA_RE","IS_ALPHA_RE","IS_DIGIT_RE","ENCODING_RE","DEFAULT_ENCODING","pageNumFromUrl","matches","match","pageNum","parseInt","removeAnchor","split","isGoodSegment","segment","index","firstSegmentHasLetters","goodSegment","length","toLowerCase","articleBaseUrl","parsed","parsedUrl","URL","parse","protocol","host","path","cleanedSegments","reverse","reduce","acc","rawSegment","includes","possibleSegment","fileExt","push","join","SENTENCE_END_RE","hasSentenceEnd","excerptContent","content","words","slice","getEncoding","str","encoding","testEncode","iconv","encodingExists","range","start","end","validateUrl","hostname","Errors","REQUEST_HEADERS","cheerio","browser","FETCH_TIMEOUT","BAD_CONTENT_TYPES","BAD_CONTENT_TYPES_RE","MAX_CONTENT_LENGTH","get","options","resolve","reject","err","response","body","validateResponse","parseNon2xx","statusMessage","statusCode","Error","error","headers","contentType","contentLength","encodeURI","href","badUrl","fetchResource","convertMetaProp","$","from","to","each","_","node","$node","value","attr","removeAttr","normalizeMetaTags","SPACER_RE","KEEP_CLASS","KEEP_SELECTORS","STRIP_OUTPUT_TAGS","REMOVE_ATTRS","REMOVE_ATTR_SELECTORS","map","selector","REMOVE_ATTR_LIST","WHITELIST_ATTRS","WHITELIST_ATTRS_RE","REMOVE_EMPTY_TAGS","REMOVE_EMPTY_SELECTORS","tag","CLEAN_CONDITIONALLY_TAGS","HEADER_TAGS","HEADER_TAG_LIST","UNLIKELY_CANDIDATES_BLACKLIST","UNLIKELY_CANDIDATES_WHITELIST","DIV_TO_P_BLOCK_TAGS","POSITIVE_SCORE_HINTS","POSITIVE_SCORE_RE","NEGATIVE_SCORE_HINTS","NEGATIVE_SCORE_RE","IS_WP_SELECTOR","PAGE_RE","BLOCK_LEVEL_TAGS","BLOCK_LEVEL_TAGS_RE","candidatesBlacklist","CANDIDATES_BLACKLIST","candidatesWhitelist","CANDIDATES_WHITELIST","stripUnlikelyCandidates","not","classes","id","classAndId","remove","brsToPs","collapsing","element","$element","nextElement","next","tagName","paragraphize","br","sibling","nextSibling","p","appendTo","replaceWith","convertDivs","div","$div","convertable","children","convertSpans","span","$span","parents","convertToParagraphs","convertNodeTo","attrs","getAttrs","attribString","key","html","contents","cleanForHeight","$img","height","width","removeSpacers","cleanImages","$article","img","markToKeep","article","tags","addClass","stripJunkTags","cleanHOnes","$hOnes","removeAllButWhitelist","removeClass","cleanAttributes","parent","removeEmpty","$p","NON_TOP_CANDIDATE_TAGS","NON_TOP_CANDIDATE_TAGS_RE","HNEWS_CONTENT_SELECTORS","PHOTO_HINTS","PHOTO_HINTS_RE","READABILITY_ASSET","DIGIT_RE","BR_TAGS_RE","BR_TAG_RE","UNLIKELY_RE","PARAGRAPH_SCORE_TAGS","CHILD_CONTENT_TAGS","BAD_TAGS","HTML_OR_BODY_RE","getWeight","score","getScore","parseFloat","scoreCommas","idkRe","scoreLength","textLength","chunks","lengthBonus","Math","min","max","scoreParagraph","setScore","addScore","amount","getOrInitScore","e","addToParent","weightNodes","scoreNode","addScoreTo","scorePs","$parent","rawScore","scoreContent","forEach","parentSelector","childSelector","mergeSiblings","$candidate","topScore","siblingScoreThreshold","wrappingDiv","$sibling","siblingScore","append","contentBonus","density","linkDensity","newScore","siblingContent","siblingContentLength","first","findTopCandidate","removeUnlessContent","weight","hasClass","pCount","inputCount","imgCount","nodeIsList","previousNode","prev","scriptCount","cleanTags","cleanHeaders","title","header","$header","prevAll","rewriteTopLevel","absolutize","rootUrl","$content","absoluteUrl","makeLinksAbsolute","totalTextLength","linkText","linkLength","extractFromMeta","metaNames","cachedNames","foundNames","filter","indexOf","name","type","nodes","values","toArray","metaValue","stripTags","isGoodNode","maxChildren","withinComment","extractFromSelectors","selectors","textOnly","cleanText","commentParent","nodeClass","class","undefined","nodeIsSufficient","isWordpress","attribs","attributes","setAttr","val","setAttribute","setAttrs","removeAttribute","IS_LINK","IS_IMAGE","TAGS_TO_REMOVE","convertLazyLoadedImages","isComment","cleanComments","root","clean","Resource","preparedResponse","validResponse","result","failed","generateDoc","encodeDoc","decodedContent","decode","load","metaContentType","properEncoding","merge","extractor","domains","domain","mergeSupportedDomains","supportedDomains","BloggerExtractor","NYMagExtractor","$children","WikipediaExtractor","prepend","TwitterExtractor","tweets","$tweetContainer","NYTimesExtractor","src","TheAtlanticExtractor","NewYorkerExtractor","WiredExtractor","MSNExtractor","YahooExtractor","BuzzfeedExtractor","has","WikiaExtractor","LittleThingsExtractor","PoliticoExtractor","DeadspinExtractor","youtubeId","BroadwayWorldExtractor","ApartmentTherapyExtractor","data","JSON","sources","MediumExtractor","ytRe","thumb","decodeURIComponent","$caption","empty","WwwTmzComExtractor","WwwWashingtonpostComExtractor","WwwHuffingtonpostComExtractor","NewrepublicComExtractor","MoneyCnnComExtractor","WwwThevergeComExtractor","WwwCnnComExtractor","$text","WwwAolComExtractor","WwwYoutubeComExtractor","videoId","WwwTheguardianComExtractor","WwwSbnationComExtractor","WwwBloombergComExtractor","WwwBustleComExtractor","WwwNprOrgExtractor","WwwRecodeNetExtractor","QzComExtractor","WwwDmagazineComExtractor","WwwReutersComExtractor","MashableComExtractor","WwwChicagotribuneComExtractor","WwwVoxComExtractor","imgHtml","NewsNationalgeographicComExtractor","$imgSrc","WwwNationalgeographicComExtractor","$imageParent","$dataAttrContainer","imgPath1","imgPath2","WwwLatimesComExtractor","$figure","PagesixComExtractor","ThefederalistpapersOrgExtractor","WwwCbssportsComExtractor","WwwMsnbcComExtractor","lead_image_url","WwwThepoliticalinsiderComExtractor","WwwMentalflossComExtractor","AbcnewsGoComExtractor","WwwNydailynewsComExtractor","WwwCnbcComExtractor","WwwPopsugarComExtractor","ObserverComExtractor","PeopleComExtractor","WwwUsmagazineComExtractor","WwwRollingstoneComExtractor","twofortysevensportsComExtractor","UproxxComExtractor","WwwEonlineComExtractor","WwwMiamiheraldComExtractor","WwwRefinery29ComExtractor","WwwMacrumorsComExtractor","WwwAndroidcentralComExtractor","WwwSiComExtractor","WwwRawstoryComExtractor","WwwCnetComExtractor","WwwCinemablendComExtractor","WwwTodayComExtractor","WwwHowtogeekComExtractor","WwwAlComExtractor","WwwThepennyhoarderComExtractor","WwwWesternjournalismComExtractor","FusionNetExtractor","WwwAmericanowComExtractor","ScienceflyComExtractor","HellogigglesComExtractor","ThoughtcatalogComExtractor","WwwNjComExtractor","WwwInquisitrComExtractor","WwwNbcnewsComExtractor","FortuneComExtractor","WwwLinkedinComExtractor","ObamawhitehouseArchivesGovExtractor","WwwOpposingviewsComExtractor","WwwProspectmagazineCoUkExtractor","ForwardComExtractor","WwwQdailyComExtractor","GothamistComExtractor","WwwFoolComExtractor","WwwSlateComExtractor","IciRadioCanadaCaExtractor","CustomExtractors","CLEAN_AUTHOR_RE","TEXT_LINK_RE","MS_DATE_STRING","SEC_DATE_STRING","CLEAN_DATE_STRING_RE","TIME_MERIDIAN_SPACE_RE","TIME_MERIDIAN_DOTS_RE","months","allMonths","timestamp1","timestamp2","timestamp3","SPLIT_DATE_STRING","TIME_WITH_OFFSET_RE","TITLE_SPLITTERS_RE","DOMAIN_ENDINGS_RE","cleanAuthor","author","leadImageUrl","validUrl","isWebUri","cleanDek","dek","excerpt","dekText","cleanDateString","dateString","createDate","timezone","format","moment","Date","tz","parseFormat","cleanDatePublished","toISOString","date","isValid","extractCleanNode","cleanConditionally","defaultCleaner","cleanTitle","resolveSplitTitle","h1","extractBreadcrumbTitle","splitTitle","termCounts","titleText","maxTerm","termCount","splitEnds","longestEnd","cleanDomainFromTitle","nakedDomain","startSlug","startSlugRatio","wuzzy","levenshtein","endSlug","endSlugRatio","newTitle","Cleaners","cleanImage","cleanContent","extractBestNode","opts","$topCandidate","GenericContentExtractor","defaultOpts","getContentNode","cleanAndReturnNode","k","STRONG_TITLE_META_TAGS","WEAK_TITLE_META_TAGS","STRONG_TITLE_SELECTORS","WEAK_TITLE_SELECTORS","GenericTitleExtractor","metaCache","AUTHOR_META_TAGS","AUTHOR_MAX_LENGTH","AUTHOR_SELECTORS","bylineRe","BYLINE_SELECTORS_RE","GenericAuthorExtractor","regex","DATE_PUBLISHED_META_TAGS","DATE_PUBLISHED_SELECTORS","abbrevMonthsStr","DATE_PUBLISHED_URL_RES","GenericDatePublishedExtractor","datePublished","GenericDekExtractor","LEAD_IMAGE_URL_META_TAGS","LEAD_IMAGE_URL_SELECTORS","POSITIVE_LEAD_IMAGE_URL_HINTS","POSITIVE_LEAD_IMAGE_URL_HINTS_RE","NEGATIVE_LEAD_IMAGE_URL_HINTS","NEGATIVE_LEAD_IMAGE_URL_HINTS_RE","GIF_RE","JPG_RE","getSig","scoreImageUrl","scoreAttr","scoreByParents","$figParent","$gParent","scoreBySibling","scoreByDimensions","area","round","scoreByPosition","$imgs","GenericLeadImageUrlExtractor","cleanUrl","imageUrl","imgs","imgScores","topUrl","scoreSimilarity","articleUrl","similarity","difflib","SequenceMatcher","ratio","diffPercent","diffModifier","scoreLinkText","linkTextAsNum","scorePageInLink","isWp","EXTRANEOUS_LINK_HINTS","EXTRANEOUS_LINK_HINTS_RE","NEXT_LINK_TEXT_RE","CAP_LINK_TEXT_RE","PREV_LINK_TEXT_RE","scoreExtraneousLinks","makeSig","$link","positiveMatch","negativeMatch","parentData","scorePrevLink","linkData","shouldScore","baseUrl","previousUrls","linkHost","fragment","scoreBaseUrl","baseRegex","scoreNextLinkText","scoreCapLinks","makeBaseRegex","scoreLinks","links","scoredPages","possiblePages","link","possiblePage","GenericNextPageUrlExtractor","scoredLinks","topPage","scoredLink","CANONICAL_META_SELECTORS","parseDomain","GenericUrlExtractor","$canonical","metaUrl","EXCERPT_META_SELECTORS","maxLength","ellipsize","ellipse","GenericExcerptExtractor","shortContent","GenericWordCountExtractor","GenericExtractor","extract","bind","stringDirection","getDirection","loaded","date_published","next_page_url","word_count","direction","url_and_domain","Detectors","detectByHtml","s","getExtractor","baseDomain","Extractors","cleanBySelectors","transformElements","transforms","$matches","findMatchingSelector","extractHtml","Array","isArray","select","extractionOpts","matchingSelector","$wrapper","wrap","extractResult","fallback","RootExtractor","contentOnly","extractedTitle","Extractor","pages","create","extractorOpts","nextPageResult","collectAllPages","Mercury","fetchAllPages","window","location"],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;;;AAAA,IAAMA,eAAe,SAArB;;AAEA,AAAe,SAASC,eAAT,CAAyBC,IAAzB,EAA+B;SACrCA,KAAKC,OAAL,CAAaH,YAAb,EAA2B,GAA3B,EAAgCI,IAAhC,EAAP;;;ACHF;;;;;AAKA,AAAe,SAASC,cAAT,CAAwBC,GAAxB,EAA6BC,SAA7B,EAAwC;MAC/CC,UAAUD,UAAUE,IAAV,CAAe;WAAMC,GAAGC,IAAH,CAAQL,GAAR,CAAN;GAAf,CAAhB;MACIE,OAAJ,EAAa;WACJA,QAAQI,IAAR,CAAaN,GAAb,EAAkB,CAAlB,CAAP;;;SAGK,IAAP;;;ACXF;;;;;;;;;;;;;;;;AAgBA,AAAO,IAAMO,kBAAkB,IAAIC,MAAJ,CAAW,0EAAX,EAAuF,GAAvF,CAAxB;;AAEP,AAAO,IAAMC,eAAe,QAArB;;AAEP,AAAO,IAAMC,cAAc,WAApB;AACP,AAAO,IAAMC,cAAc,WAApB;;AAEP,AAAO,IAAMC,cAAc,oBAApB;AACP,AAAO,IAAMC,mBAAmB,OAAzB;;ACtBQ,SAASC,cAAT,CAAwBd,GAAxB,EAA6B;MACpCe,UAAUf,IAAIgB,KAAJ,CAAUT,eAAV,CAAhB;MACI,CAACQ,OAAL,EAAc,OAAO,IAAP;;MAERE,UAAUC,SAASH,QAAQ,CAAR,CAAT,EAAqB,EAArB,CAAhB;;;;SAIOE,UAAU,GAAV,GAAgBA,OAAhB,GAA0B,IAAjC;;;ACVa,SAASE,YAAT,CAAsBnB,GAAtB,EAA2B;SACjCA,IAAIoB,KAAJ,CAAU,GAAV,EAAe,CAAf,EAAkBvB,OAAlB,CAA0B,KAA1B,EAAiC,EAAjC,CAAP;;;ACQF,SAASwB,aAAT,CAAuBC,OAAvB,EAAgCC,KAAhC,EAAuCC,sBAAvC,EAA+D;MACzDC,cAAc,IAAlB;;;;MAIIF,QAAQ,CAAR,IAAaZ,YAAYN,IAAZ,CAAiBiB,OAAjB,CAAb,IAA0CA,QAAQI,MAAR,GAAiB,CAA/D,EAAkE;kBAClD,IAAd;;;;;MAKEH,UAAU,CAAV,IAAeD,QAAQK,WAAR,OAA0B,OAA7C,EAAsD;kBACtC,KAAd;;;;;MAKEJ,QAAQ,CAAR,IAAaD,QAAQI,MAAR,GAAiB,CAA9B,IAAmC,CAACF,sBAAxC,EAAgE;kBAChD,KAAd;;;SAGKC,WAAP;;;;;;AAMF,AAAe,SAASG,cAAT,CAAwB5B,GAAxB,EAA6B6B,MAA7B,EAAqC;MAC5CC,YAAYD,UAAUE,IAAIC,KAAJ,CAAUhC,GAAV,CAA5B;MACQiC,QAF0C,GAEjBH,SAFiB,CAE1CG,QAF0C;MAEhCC,IAFgC,GAEjBJ,SAFiB,CAEhCI,IAFgC;MAE1BC,IAF0B,GAEjBL,SAFiB,CAE1BK,IAF0B;;;MAI9CX,yBAAyB,KAA7B;MACMY,kBAAkBD,KAAKf,KAAL,CAAW,GAAX,EACvBiB,OADuB,GAEvBC,MAFuB,CAEhB,UAACC,GAAD,EAAMC,UAAN,EAAkBjB,KAAlB,EAA4B;QAC9BD,UAAUkB,UAAd;;;QAGIlB,QAAQmB,QAAR,CAAiB,GAAjB,CAAJ,EAA2B;2BACUnB,QAAQF,KAAR,CAAc,GAAd,CADV;;UAClBsB,eADkB;UACDC,OADC;;UAErBjC,YAAYL,IAAZ,CAAiBsC,OAAjB,CAAJ,EAA+B;kBACnBD,eAAV;;;;;;QAMAnC,gBAAgBF,IAAhB,CAAqBiB,OAArB,KAAiCC,QAAQ,CAA7C,EAAgD;gBACpCD,QAAQzB,OAAR,CAAgBU,eAAhB,EAAiC,EAAjC,CAAV;;;;;;;QAOEgB,UAAU,CAAd,EAAiB;+BACUd,aAAaJ,IAAb,CAAkBiB,OAAlB,CAAzB;;;;QAIED,cAAcC,OAAd,EAAuBC,KAAvB,EAA8BC,sBAA9B,CAAJ,EAA2D;UACrDoB,IAAJ,CAAStB,OAAT;;;WAGKiB,GAAP;GAhCsB,EAiCrB,EAjCqB,CAAxB;;SAmCUN,QAAV,UAAuBC,IAAvB,GAA8BE,gBAAgBC,OAAhB,GAA0BQ,IAA1B,CAA+B,GAA/B,CAA9B;;;AC5EF;;AAEA,IAAMC,kBAAkB,IAAItC,MAAJ,CAAW,QAAX,CAAxB;AACA,AAAe,SAASuC,cAAT,CAAwBnD,IAAxB,EAA8B;SACpCkD,gBAAgBzC,IAAhB,CAAqBT,IAArB,CAAP;;;ACJa,SAASoD,cAAT,CAAwBC,OAAxB,EAA6C;kBAAZC,KAAY,uEAAJ,EAAI;;qBACnDD,QAAQnD,IAAR,GACQsB,KADR,CACc,KADd,EAEQ+B,KAFR,CAEc,CAFd,EAEiBD,KAFjB,EAGQL,IAHR,CAGa,GAHb,CAAP;;;ACEF;;;AAGA,AAAe,SAASO,WAAT,CAAqBC,GAArB,EAA0B;MACnCC,WAAWzC,gBAAf;MACID,YAAYP,IAAZ,CAAiBgD,GAAjB,CAAJ,EAA2B;QACnBE,aAAa3C,YAAYN,IAAZ,CAAiB+C,GAAjB,EAAsB,CAAtB,CAAnB;QACIG,MAAMC,cAAN,CAAqBF,UAArB,CAAJ,EAAsC;iBACzBA,UAAX;;;SAGGD,QAAP;;;eCduBI;;AAAzB,AAAe,SAAUA,KAAV;MAAgBC,KAAhB,uEAAwB,CAAxB;MAA2BC,GAA3B,uEAAiC,CAAjC;;;;;gBACND,SAASC,GADH;;;;;;iBAELD,SAAS,CAFJ;;;;;;;;;;;;;;ACAf;AACA,AAAe,SAASE,WAAT,OAAmC;MAAZC,QAAY,QAAZA,QAAY;;;SAEzC,CAAC,CAACA,QAAT;;;ACHF,IAAMC,SAAS;UACL;WACC,IADD;cAEI;;CAHd,CAOA;;ACLA;AACA,AAAO,IAAMC,kBAAkBC,QAAQC,OAAR,GAAkB,EAAlB,GAAuB;gBACtC;CADT;;;AAKP,AAAO,IAAMC,gBAAgB,KAAtB;;;AAGP,IAAMC,oBAAoB,CACxB,YADwB,EAExB,WAFwB,EAGxB,YAHwB,EAIxB,WAJwB,CAA1B;;AAOA,AAAO,IAAMC,uBAAuB,IAAI7D,MAAJ,QAAgB4D,kBAAkBvB,IAAlB,CAAuB,GAAvB,CAAhB,SAAiD,GAAjD,CAA7B;;;;AAIP,AAAO,IAAMyB,qBAAqB,OAA3B;;;;qCAKP,AAAO,AACP,AAAO,AAKP,AAAO;;ACtBP,SAASC,GAAT,CAAaC,OAAb,EAAsB;SACb,aAAY,UAACC,OAAD,EAAUC,MAAV,EAAqB;YAC9BF,OAAR,EAAiB,UAACG,GAAD,EAAMC,QAAN,EAAgBC,IAAhB,EAAyB;UACpCF,GAAJ,EAAS;eACAA,GAAP;OADF,MAEO;gBACG,EAAEE,UAAF,EAAQD,kBAAR,EAAR;;KAJJ;GADK,CAAP;;;;;;;;AAgBF,AAAO,SAASE,gBAAT,CAA0BF,QAA1B,EAAyD;MAArBG,WAAqB,uEAAP,KAAO;;;;;;;;MAQ3DH,SAASI,aAAT,IAA0BJ,SAASI,aAAT,KAA2B,IAAtD,IACEJ,SAASK,UAAT,KAAwB,GAF5B,EAGE;QACI,CAACL,SAASK,UAAd,EAA0B;YAClB,IAAIC,KAAJ,sDAC+CN,SAASO,KADxD,CAAN;KADF,MAIO,IAAI,CAACJ,WAAL,EAAkB;YACjB,IAAIG,KAAJ,kDAC2CN,SAASK,UADpD,wEAAN;;;;0BASAL,SAASQ,OAzBiD;MAuB5CC,WAvB4C,qBAuB5D,cAvB4D;MAwB1CC,aAxB0C,qBAwB5D,gBAxB4D;;;;MA4B1DjB,qBAAqBhE,IAArB,CAA0BgF,WAA1B,CAAJ,EAA4C;UACpC,IAAIH,KAAJ,yCACkCG,WADlC,0BAAN;;;;MAMEC,gBAAgBhB,kBAApB,EAAwC;UAChC,IAAIY,KAAJ,yEACkEZ,kBADlE,OAAN;;;SAKK,IAAP;;;;;AAKF,AAAO;;;;;;;;AAUP;yDAAe,iBAA6BtE,GAA7B,EAAkC8B,SAAlC;;;;;;;wBACDA,aAAaC,IAAIC,KAAJ,CAAUuD,UAAUvF,GAAV,CAAV,CAAzB;;mBADa,GAGG;mBACT8B,UAAU0D,IADD;oCAEAxB,eAAd,CAFc;uBAGLG,aAHK;;mBAKT,IALS;;;wBAQJ,IARI;;oBAUR,IAVQ;;kCAYM;aAfT;;mBAkBoBI,IAAIC,OAAJ,CAlBpB;;;;oBAAA,SAkBLI,QAlBK;gBAAA,SAkBKC,IAlBL;;;6BAqBMD,QAAjB;6CACO;wBAAA;;aAtBI;;;;;6CA2BJb,OAAO0B,MA3BH;;;;;;;;GAAf;;WAA8BC,aAA9B;;;;SAA8BA,aAA9B;;;ACpFA,SAASC,eAAT,CAAyBC,CAAzB,EAA4BC,IAA5B,EAAkCC,EAAlC,EAAsC;cAC1BD,IAAV,QAAmBE,IAAnB,CAAwB,UAACC,CAAD,EAAIC,IAAJ,EAAa;QAC7BC,QAAQN,EAAEK,IAAF,CAAd;;QAEME,QAAQD,MAAME,IAAN,CAAWP,IAAX,CAAd;UACMO,IAAN,CAAWN,EAAX,EAAeK,KAAf;UACME,UAAN,CAAiBR,IAAjB;GALF;;SAQOD,CAAP;;;;;;;;;;AAUF,AAAe,SAASU,iBAAT,CAA2BV,CAA3B,EAA8B;MACvCD,gBAAgBC,CAAhB,EAAmB,SAAnB,EAA8B,OAA9B,CAAJ;MACID,gBAAgBC,CAAhB,EAAmB,UAAnB,EAA+B,MAA/B,CAAJ;SACOA,CAAP;;;ACtBF;AACA,AAAO,IAAMW,YAAY,IAAI/F,MAAJ,CAAW,0BAAX,EAAuC,GAAvC,CAAlB;;;;AAIP,AAAO,IAAMgG,aAAa,qBAAnB;;AAEP,AAAO,IAAMC,iBAAiB,CAC5B,wCAD4B,EAE5B,iDAF4B,EAG5B,uCAH4B,EAI5B,qCAJ4B,EAK5B,oCAL4B,CAAvB;;;AASP,AAAO,IAAMC,oBAAoB,CAC/B,OAD+B,EAE/B,QAF+B,EAG/B,UAH+B,EAI/B,MAJ+B,EAK/B,OAL+B,EAM/B,IAN+B,EAO/B,OAP+B,EAQ/B,QAR+B,EAS/B,QAT+B,CAA1B;;;AAaP,AAAO,IAAMC,eAAe,CAAC,OAAD,EAAU,OAAV,CAArB;AACP,AAAO,IAAMC,wBAAwBD,aAAaE,GAAb,CAAiB;eAAgBC,QAAhB;CAAjB,CAA9B;AACP,AAAO,IAAMC,mBAAmBJ,aAAa9D,IAAb,CAAkB,GAAlB,CAAzB;AACP,AAAO,IAAMmE,kBAAkB,CAC7B,KAD6B,EAE7B,QAF6B,EAG7B,MAH6B,EAI7B,OAJ6B,EAK7B,IAL6B,EAM7B,KAN6B,EAO7B,YAP6B,EAQ7B,OAR6B,EAS7B,QAT6B,CAAxB;;AAYP,AAAO,IAAMC,qBAAqB,IAAIzG,MAAJ,QAAgBwG,gBAAgBnE,IAAhB,CAAqB,GAArB,CAAhB,SAA+C,GAA/C,CAA3B;;;AAGP,AAAO,IAAMqE,oBAAoB,CAAC,GAAD,CAA1B;AACP,AAAO,IAAMC,yBAAyBD,kBAAkBL,GAAlB,CAAsB;SAAUO,GAAV;CAAtB,EAA6CvE,IAA7C,CAAkD,GAAlD,CAA/B;;;AAGP,AAAO,IAAMwE,2BAA2B,CAAC,IAAD,EAAO,IAAP,EAAa,OAAb,EAAsB,KAAtB,EAA6B,QAA7B,EAAuC,MAAvC,EAA+CxE,IAA/C,CAAoD,GAApD,CAAjC;;;AAGP,IAAMyE,cAAc,CAAC,IAAD,EAAO,IAAP,EAAa,IAAb,EAAmB,IAAnB,EAAyB,IAAzB,CAApB;AACA,AAAO,IAAMC,kBAAkBD,YAAYzE,IAAZ,CAAiB,GAAjB,CAAxB;;;;;;;;AAQP,AAAO,IAAM2E,gCAAgC,CAC3C,UAD2C,EAE3C,OAF2C,EAG3C,QAH2C,EAI3C,SAJ2C,EAK3C,SAL2C,EAM3C,KAN2C,EAO3C,gBAP2C,EAQ3C,OAR2C,EAS3C,SAT2C,EAU3C,cAV2C,EAW3C,QAX2C,EAY3C,iBAZ2C,EAa3C,OAb2C,EAc3C,MAd2C;;AAgB3C,QAhB2C,EAiB3C,QAjB2C,EAkB3C,QAlB2C,EAmB3C,OAnB2C;AAoB3C,MApB2C,EAqB3C,MArB2C,EAsB3C,KAtB2C,EAuB3C,UAvB2C,EAwB3C,OAxB2C,EAyB3C,YAzB2C,EA0B3C,UA1B2C;AA2B3C,2BA3B2C;AA4B3C,OA5B2C,EA6B3C,eA7B2C,EA8B3C,SA9B2C,EA+B3C,QA/B2C,EAgC3C,QAhC2C,EAiC3C,KAjC2C,EAkC3C,OAlC2C,EAmC3C,UAnC2C,EAoC3C,SApC2C,EAqC3C,UArC2C,EAsC3C,SAtC2C,EAuC3C,SAvC2C,EAwC3C,OAxC2C,CAAtC;;;;;;;;;;;;;AAsDP,AAAO,IAAMC,gCAAgC,CAC3C,KAD2C,EAE3C,SAF2C,EAG3C,MAH2C,EAI3C,WAJ2C,EAK3C,QAL2C,EAM3C,SAN2C,EAO3C,qBAP2C,EAQ3C,QAR2C;AAS3C,OAT2C,EAU3C,QAV2C,EAW3C,OAX2C,EAY3C,MAZ2C,EAa3C,MAb2C,EAc3C,OAd2C,EAe3C,QAf2C,CAAtC;;;;;AAqBP,AAAO,IAAMC,sBAAsB,CACjC,GADiC,EAEjC,YAFiC,EAGjC,IAHiC,EAIjC,KAJiC,EAKjC,KALiC,EAMjC,GANiC,EAOjC,KAPiC,EAQjC,OARiC,EASjC7E,IATiC,CAS5B,GAT4B,CAA5B;;;;AAaP,AAAO;;AAeP,AAAO;;;;;AAMP,AAAO;;AASP,AAAO;AAMP,AAAO;;;;;;AAMP,AAAO,IAAM8E,uBAAuB,CAClC,SADkC,EAElC,gBAFkC,EAGlC,iBAHkC,EAIlC,MAJkC,EAKlC,MALkC,EAMlC,SANkC,EAOlC,qBAPkC,EAQlC,OARkC,EASlC,QATkC,EAUlC,MAVkC,EAWlC,QAXkC,EAYlC,MAZkC,EAalC,YAbkC,EAclC,WAdkC,EAelC,MAfkC,EAgBlC,OAhBkC,EAiBlC,MAjBkC,EAkBlC,UAlBkC;AAmBlC,SAnBkC,CAA7B;;;AAuBP,AAAO,IAAMC,oBAAoB,IAAIpH,MAAJ,CAAWmH,qBAAqB9E,IAArB,CAA0B,GAA1B,CAAX,EAA2C,GAA3C,CAA1B;;;AAGP,AAAO;;;;;;AAMP,AAAO,IAAMgF,uBAAuB,CAClC,OADkC,EAElC,QAFkC,EAGlC,QAHkC,EAIlC,KAJkC,EAKlC,UALkC,EAMlC,QANkC,EAOlC,QAPkC,EAQlC,OARkC,EASlC,MATkC,EAUlC,OAVkC,EAWlC,SAXkC,EAYlC,YAZkC,EAalC,SAbkC,EAclC,MAdkC,EAelC,QAfkC,EAgBlC,OAhBkC,EAiBlC,MAjBkC,EAkBlC,MAlBkC,EAmBlC,SAnBkC,EAoBlC,UApBkC;AAqBlC,MArBkC,EAsBlC,QAtBkC,EAuBlC,UAvBkC,EAwBlC,MAxBkC,EAyBlC,MAzBkC,EA0BlC,MA1BkC,EA2BlC,UA3BkC;AA4BlC,mBA5BkC,EA6BlC,MA7BkC,EA8BlC,WA9BkC,EA+BlC,MA/BkC,EAgClC,UAhCkC,EAiClC,OAjCkC,EAkClC,MAlCkC,EAmClC,OAnCkC,EAoClC,UApCkC;AAqClC,OArCkC,EAsClC,KAtCkC;AAuClC,SAvCkC,EAwClC,SAxCkC,EAyClC,cAzCkC;AA0ClC,QA1CkC,EA2ClC,WA3CkC,EA4ClC,OA5CkC,EA6ClC,UA7CkC,EA8ClC,UA9CkC,EA+ClC,MA/CkC,EAgDlC,SAhDkC,EAiDlC,SAjDkC,EAkDlC,OAlDkC,EAmDlC,KAnDkC,EAoDlC,SApDkC,EAqDlC,MArDkC,EAsDlC,OAtDkC,EAuDlC,QAvDkC,CAA7B;;AA0DP,AAAO,IAAMC,oBAAoB,IAAItH,MAAJ,CAAWqH,qBAAqBhF,IAArB,CAA0B,GAA1B,CAAX,EAA2C,GAA3C,CAA1B;;;AAGP,AAAO,IAAMkF,iBAAiB,wCAAvB;;;AAGP,AAAO;;;;AAIP,AAAO;AAgBP,AAAO;;;AAGP,AAAO,IAAMC,UAAU,IAAIxH,MAAJ,CAAW,iBAAX,EAA8B,GAA9B,CAAhB;;;;;;AAMP,AAAO;;;;AAIP,AAAO;;;;AAIP,AAAO;;;AAGP,AAAO;;;AAGP,AAAO;;;;AAIP,AAAO,IAAMyH,mBAAmB,CAC9B,SAD8B,EAE9B,OAF8B,EAG9B,YAH8B,EAI9B,MAJ8B,EAK9B,IAL8B,EAM9B,QAN8B,EAO9B,QAP8B,EAQ9B,SAR8B,EAS9B,KAT8B,EAU9B,UAV8B,EAW9B,IAX8B,EAY9B,KAZ8B,EAa9B,IAb8B,EAc9B,IAd8B,EAe9B,OAf8B,EAgB9B,UAhB8B,EAiB9B,YAjB8B,EAkB9B,QAlB8B,EAmB9B,QAnB8B,EAoB9B,MApB8B,EAqB9B,IArB8B,EAsB9B,IAtB8B,EAuB9B,IAvB8B,EAwB9B,IAxB8B,EAyB9B,IAzB8B,EA0B9B,IA1B8B,EA2B9B,QA3B8B,EA4B9B,QA5B8B,EA6B9B,IA7B8B,EA8B9B,IA9B8B,EA+B9B,KA/B8B,EAgC9B,QAhC8B,EAiC9B,IAjC8B,EAkC9B,QAlC8B,EAmC9B,GAnC8B,EAoC9B,KApC8B,EAqC9B,UArC8B,EAsC9B,SAtC8B,EAuC9B,OAvC8B,EAwC9B,OAxC8B,EAyC9B,UAzC8B,EA0C9B,OA1C8B,EA2C9B,IA3C8B,EA4C9B,OA5C8B,EA6C9B,IA7C8B,EA8C9B,IA9C8B,EA+C9B,OA/C8B,CAAzB;AAiDP,AAAO,IAAMC,sBAAsB,IAAI1H,MAAJ,QAAgByH,iBAAiBpF,IAAjB,CAAsB,GAAtB,CAAhB,SAAgD,GAAhD,CAA5B;;;;;;AAMP,IAAMsF,sBAAsBX,8BAA8B3E,IAA9B,CAAmC,GAAnC,CAA5B;AACA,AAAO,IAAMuF,uBAAuB,IAAI5H,MAAJ,CAAW2H,mBAAX,EAAgC,GAAhC,CAA7B;;AAEP,IAAME,sBAAsBZ,8BAA8B5E,IAA9B,CAAmC,GAAnC,CAA5B;AACA,AAAO,IAAMyF,uBAAuB,IAAI9H,MAAJ,CAAW6H,mBAAX,EAAgC,GAAhC,CAA7B,CAEP,AAAO,AAEP,AAAO,AACP,AAAO,AACP,AAAO,AAEP,AAAO;;AC9YQ,SAASE,uBAAT,CAAiC3C,CAAjC,EAAoC;;;;;;;;;;IAU/C,GAAF,EAAO4C,GAAP,CAAW,GAAX,EAAgBzC,IAAhB,CAAqB,UAACxE,KAAD,EAAQ0E,IAAR,EAAiB;QAC9BC,QAAQN,EAAEK,IAAF,CAAd;QACMwC,UAAUvC,MAAME,IAAN,CAAW,OAAX,CAAhB;QACMsC,KAAKxC,MAAME,IAAN,CAAW,IAAX,CAAX;QACI,CAACsC,EAAD,IAAO,CAACD,OAAZ,EAAqB;;QAEfE,cAAgBF,WAAW,EAA3B,WAAiCC,MAAM,EAAvC,CAAN;QACIJ,qBAAqBjI,IAArB,CAA0BsI,UAA1B,CAAJ,EAA2C;;KAA3C,MAEO,IAAIP,qBAAqB/H,IAArB,CAA0BsI,UAA1B,CAAJ,EAA2C;YAC1CC,MAAN;;GAVJ;;SAcOhD,CAAP;;;AC3BF;;;;;;;;;AASA,AAAe,SAASiD,UAAT,CAAiBjD,CAAjB,EAAoB;MAC7BkD,aAAa,KAAjB;IACE,IAAF,EAAQ/C,IAAR,CAAa,UAACxE,KAAD,EAAQwH,OAAR,EAAoB;QACzBC,WAAWpD,EAAEmD,OAAF,CAAjB;QACME,cAAcD,SAASE,IAAT,GAAgB3E,GAAhB,CAAoB,CAApB,CAApB;;QAEI0E,eAAeA,YAAYE,OAAZ,CAAoBxH,WAApB,OAAsC,IAAzD,EAA+D;mBAChD,IAAb;eACSiH,MAAT;KAFF,MAGO,IAAIE,UAAJ,EAAgB;mBACR,KAAb;;mBAEaC,OAAb,EAAsBnD,CAAtB,EAAyB,IAAzB;;GAVJ;;SAcOA,CAAP;;;ACzBF;;;;;;;;;;;AAWA,AAAe,SAASwD,YAAT,CAAsBnD,IAAtB,EAA4BL,CAA5B,EAA2C;MAAZyD,EAAY,uEAAP,KAAO;;MAClDnD,QAAQN,EAAEK,IAAF,CAAd;;MAEIoD,EAAJ,EAAQ;QACFC,UAAUrD,KAAKsD,WAAnB;QACMC,IAAI5D,EAAE,SAAF,CAAV;;;;WAIO0D,WAAW,EAAEA,QAAQH,OAAR,IAAmBjB,oBAAoB7H,IAApB,CAAyBiJ,QAAQH,OAAjC,CAArB,CAAlB,EAAmF;UAC3EI,cAAcD,QAAQC,WAA5B;QACED,OAAF,EAAWG,QAAX,CAAoBD,CAApB;gBACUD,WAAV;;;UAGIG,WAAN,CAAkBF,CAAlB;UACMZ,MAAN;WACOhD,CAAP;;;SAGKA,CAAP;;;AC7BF,SAAS+D,WAAT,CAAqB/D,CAArB,EAAwB;IACpB,KAAF,EAASG,IAAT,CAAc,UAACxE,KAAD,EAAQqI,GAAR,EAAgB;QACtBC,OAAOjE,EAAEgE,GAAF,CAAb;QACME,cAAcD,KAAKE,QAAL,CAAcrC,mBAAd,EAAmChG,MAAnC,KAA8C,CAAlE;;QAEIoI,WAAJ,EAAiB;uBACDD,IAAd,EAAoBjE,CAApB,EAAuB,GAAvB;;GALJ;;SASOA,CAAP;;;AAGF,SAASoE,YAAT,CAAsBpE,CAAtB,EAAyB;IACrB,MAAF,EAAUG,IAAV,CAAe,UAACxE,KAAD,EAAQ0I,IAAR,EAAiB;QACxBC,QAAQtE,EAAEqE,IAAF,CAAd;QACMH,cAAcI,MAAMC,OAAN,CAAc,QAAd,EAAwBzI,MAAxB,KAAmC,CAAvD;QACIoI,WAAJ,EAAiB;uBACDI,KAAd,EAAqBtE,CAArB,EAAwB,GAAxB;;GAJJ;;SAQOA,CAAP;;;;;;;;;;;;;;;AAeF,AAAe,SAASwE,sBAAT,CAA6BxE,CAA7B,EAAgC;MACzCiD,WAAQjD,CAAR,CAAJ;MACI+D,YAAY/D,CAAZ,CAAJ;MACIoE,aAAapE,CAAb,CAAJ;;SAEOA,CAAP;;;AC5Ca,SAASyE,gBAAT,CAAuBnE,KAAvB,EAA8BN,CAA9B,EAA4C;MAAXwB,GAAW,uEAAL,GAAK;;MACnDnB,OAAOC,MAAM3B,GAAN,CAAU,CAAV,CAAb;MACI,CAAC0B,IAAL,EAAW;WACFL,CAAP;;MAEI0E,QAAQC,SAAStE,IAAT,KAAkB,EAAhC;;;MAGMuE,eAAe,iBAAgBF,KAAhB,EACQzD,GADR,CACY;WAAU4D,GAAV,SAAiBH,MAAMG,GAAN,CAAjB;GADZ,EAEQ5H,IAFR,CAEa,GAFb,CAArB;MAGI6H,aAAJ;;MAEI9E,EAAE1B,OAAN,EAAe;;;;WAIN+B,KAAKkD,OAAL,CAAaxH,WAAb,OAA+B,UAA/B,GAA4CuE,MAAMtG,IAAN,EAA5C,GAA2DsG,MAAMwE,IAAN,EAAlE;GAJF,MAKO;WACExE,MAAMyE,QAAN,EAAP;;QAEIjB,WAAN,OACMtC,GADN,SACaoD,YADb,SAC6BE,IAD7B,UACsCtD,GADtC;SAGOxB,CAAP;;;ACxBF,SAASgF,cAAT,CAAwBC,IAAxB,EAA8BjF,CAA9B,EAAiC;MACzBkF,SAAS5J,SAAS2J,KAAKzE,IAAL,CAAU,QAAV,CAAT,EAA8B,EAA9B,CAAf;MACM2E,QAAQ7J,SAAS2J,KAAKzE,IAAL,CAAU,OAAV,CAAT,EAA6B,EAA7B,KAAoC,EAAlD;;;;;MAKI,CAAC0E,UAAU,EAAX,IAAiB,EAAjB,IAAuBC,QAAQ,EAAnC,EAAuC;SAChCnC,MAAL;GADF,MAEO,IAAIkC,MAAJ,EAAY;;;;SAIZzE,UAAL,CAAgB,QAAhB;;;SAGKT,CAAP;;;;;AAKF,SAASoF,aAAT,CAAuBH,IAAvB,EAA6BjF,CAA7B,EAAgC;MAC1BW,UAAUlG,IAAV,CAAewK,KAAKzE,IAAL,CAAU,KAAV,CAAf,CAAJ,EAAsC;SAC/BwC,MAAL;;;SAGKhD,CAAP;;;AAGF,AAAe,SAASqF,WAAT,CAAqBC,QAArB,EAA+BtF,CAA/B,EAAkC;WACtCzF,IAAT,CAAc,KAAd,EAAqB4F,IAArB,CAA0B,UAACxE,KAAD,EAAQ4J,GAAR,EAAgB;QAClCN,OAAOjF,EAAEuF,GAAF,CAAb;;mBAEeN,IAAf,EAAqBjF,CAArB;kBACciF,IAAd,EAAoBjF,CAApB;GAJF;;SAOOA,CAAP;;;AChCa,SAASwF,UAAT,CAAoBC,OAApB,EAA6BzF,CAA7B,EAAgC5F,GAAhC,EAAgD;MAAXsL,IAAW,uEAAJ,EAAI;;MACzDA,KAAK5J,MAAL,KAAgB,CAApB,EAAuB;WACd+E,cAAP;;;MAGEzG,GAAJ,EAAS;qBACwB+B,IAAIC,KAAJ,CAAUhC,GAAV,CADxB;QACCiC,QADD,cACCA,QADD;QACW6B,QADX,cACWA,QADX;;wCAEIwH,IAAX,sBAAiCrJ,QAAjC,UAA8C6B,QAA9C;;;IAGAwH,KAAKzI,IAAL,CAAU,GAAV,CAAF,EAAkBwI,OAAlB,EAA2BE,QAA3B,CAAoC/E,UAApC;;SAEOZ,CAAP;;;ACda,SAAS4F,aAAT,CAAuBH,OAAvB,EAAgCzF,CAAhC,EAA8C;MAAX0F,IAAW,uEAAJ,EAAI;;MACvDA,KAAK5J,MAAL,KAAgB,CAApB,EAAuB;WACdgF,iBAAP;;;;;IAKA4E,KAAKzI,IAAL,CAAU,GAAV,CAAF,EAAkBwI,OAAlB,EAA2B7C,GAA3B,OAAmChC,UAAnC,EAAiDoC,MAAjD;;SAEOhD,CAAP;;;ACZF;;;AAGA,AAAe,SAAS6F,aAAT,CAAoBJ,OAApB,EAA6BzF,CAA7B,EAAgC;MACvC8F,SAAS9F,EAAE,IAAF,EAAQyF,OAAR,CAAf;;MAEIK,OAAOhK,MAAP,GAAgB,CAApB,EAAuB;WACdqE,IAAP,CAAY,UAACxE,KAAD,EAAQ0E,IAAR;aAAiBL,EAAEK,IAAF,EAAQ2C,MAAR,EAAjB;KAAZ;GADF,MAEO;WACE7C,IAAP,CAAY,UAACxE,KAAD,EAAQ0E,IAAR,EAAiB;uBACbL,EAAEK,IAAF,CAAd,EAAuBL,CAAvB,EAA0B,IAA1B;KADF;;;SAKKA,CAAP;;;ACNF,SAAS+F,qBAAT,CAA+BT,QAA/B,EAAyCtF,CAAzC,EAA4C;WACjCzF,IAAT,CAAc,GAAd,EAAmB4F,IAAnB,CAAwB,UAACxE,KAAD,EAAQ0E,IAAR,EAAiB;QACjCqE,QAAQC,SAAStE,IAAT,CAAd;;aAESA,IAAT,EAAe,iBAAgBqE,KAAhB,EAAuBhI,MAAvB,CAA8B,UAACC,GAAD,EAAM6D,IAAN,EAAe;UACtDa,mBAAmB5G,IAAnB,CAAwB+F,IAAxB,CAAJ,EAAmC;4BACrB7D,GAAZ,sBAAkB6D,IAAlB,EAAyBkE,MAAMlE,IAAN,CAAzB;;;aAGK7D,GAAP;KALa,EAMZ,EANY,CAAf;GAHF;;;UAaMiE,UAAN,EAAoB0E,QAApB,EAA8BU,WAA9B,CAA0CpF,UAA1C;;SAEO0E,QAAP;;;;;;;;;;AAUF,AAAe,SAASW,kBAAT,CAAyBX,QAAzB,EAAmCtF,CAAnC,EAAsC;;;;SAI5C+F,sBACLT,SAASY,MAAT,GAAkBpK,MAAlB,GAA2BwJ,SAASY,MAAT,EAA3B,GAA+CZ,QAD1C,EAELtF,CAFK,CAAP;;;ACxCa,SAASmG,WAAT,CAAqBb,QAArB,EAA+BtF,CAA/B,EAAkC;WACtCzF,IAAT,CAAc,GAAd,EAAmB4F,IAAnB,CAAwB,UAACxE,KAAD,EAAQiI,CAAR,EAAc;QAC9BwC,KAAKpG,EAAE4D,CAAF,CAAX;QACIwC,GAAG7L,IAAH,CAAQ,aAAR,EAAuBuB,MAAvB,KAAkC,CAAlC,IAAuCsK,GAAGpM,IAAH,GAAUE,IAAV,OAAqB,EAAhE,EAAoEkM,GAAGpD,MAAH;GAFtE;;SAKOhD,CAAP;;;ACNF;;;;;;AAMA,AAAO,IAAM4B,kCAAgC,CAC3C,UAD2C,EAE3C,OAF2C,EAG3C,QAH2C,EAI3C,SAJ2C,EAK3C,SAL2C,EAM3C,KAN2C,EAO3C,gBAP2C,EAQ3C,OAR2C,EAS3C,SAT2C,EAU3C,cAV2C,EAW3C,QAX2C,EAY3C,iBAZ2C,EAa3C,OAb2C,EAc3C,MAd2C,EAe3C,MAf2C,EAgB3C,QAhB2C,EAiB3C,QAjB2C,EAkB3C,QAlB2C,EAmB3C,OAnB2C;AAoB3C,MApB2C,EAqB3C,MArB2C,EAsB3C,KAtB2C,EAuB3C,OAvB2C,EAwB3C,YAxB2C,EAyB3C,UAzB2C;AA0B3C,2BA1B2C;AA2B3C,OA3B2C,EA4B3C,eA5B2C,EA6B3C,SA7B2C,EA8B3C,QA9B2C,EA+B3C,QA/B2C,EAgC3C,KAhC2C,EAiC3C,OAjC2C,EAkC3C,UAlC2C,EAmC3C,SAnC2C,EAoC3C,UApC2C,EAqC3C,SArC2C,EAsC3C,OAtC2C,CAAtC;;;;;;;;;;;;;AAoDP,AAAO,IAAMC,kCAAgC,CAC3C,KAD2C,EAE3C,SAF2C,EAG3C,MAH2C,EAI3C,WAJ2C,EAK3C,QAL2C,EAM3C,SAN2C,EAO3C,qBAP2C,EAQ3C,QAR2C;AAS3C,OAT2C,EAU3C,QAV2C,EAW3C,OAX2C,EAY3C,MAZ2C,EAa3C,MAb2C,EAc3C,OAd2C,EAe3C,QAf2C,CAAtC;;;;;AAqBP,AAAO,IAAMC,wBAAsB,CACjC,GADiC,EAEjC,YAFiC,EAGjC,IAHiC,EAIjC,KAJiC,EAKjC,KALiC,EAMjC,GANiC,EAOjC,KAPiC,EAQjC,OARiC,EASjC7E,IATiC,CAS5B,GAT4B,CAA5B;;;;AAaP,AAAO,IAAMoJ,2BAAyB,CACpC,IADoC,EAEpC,GAFoC,EAGpC,GAHoC,EAIpC,OAJoC,EAKpC,IALoC,EAMpC,MANoC,EAOpC,MAPoC,EAQpC,UARoC,EASpC,OAToC,EAUpC,KAVoC,EAWpC,MAXoC,EAYpC,MAZoC,CAA/B;;AAeP,AAAO,IAAMC,8BACX,IAAI1L,MAAJ,QAAgByL,yBAAuBpJ,IAAvB,CAA4B,GAA5B,CAAhB,SAAsD,GAAtD,CADK;;;;;AAMP,AAAO,IAAMsJ,4BAA0B,CACrC,CAAC,SAAD,EAAY,gBAAZ,CADqC,EAErC,CAAC,OAAD,EAAU,gBAAV,CAFqC,EAGrC,CAAC,QAAD,EAAW,gBAAX,CAHqC,EAIrC,CAAC,OAAD,EAAU,WAAV,CAJqC,EAKrC,CAAC,OAAD,EAAU,YAAV,CALqC,EAMrC,CAAC,OAAD,EAAU,YAAV,CANqC,CAAhC;;AASP,AAAO,IAAMC,gBAAc,CACzB,QADyB,EAEzB,OAFyB,EAGzB,OAHyB,EAIzB,SAJyB,CAApB;AAMP,AAAO,IAAMC,mBAAiB,IAAI7L,MAAJ,CAAW4L,cAAYvJ,IAAZ,CAAiB,GAAjB,CAAX,EAAkC,GAAlC,CAAvB;;;;;;AAMP,AAAO,IAAM8E,yBAAuB,CAClC,SADkC,EAElC,gBAFkC,EAGlC,iBAHkC,EAIlC,MAJkC,EAKlC,MALkC,EAMlC,SANkC,EAOlC,qBAPkC,EAQlC,OARkC,EASlC,QATkC,EAUlC,MAVkC,EAWlC,QAXkC,EAYlC,MAZkC,EAalC,YAbkC,EAclC,WAdkC,EAelC,MAfkC,EAgBlC,OAhBkC,EAiBlC,MAjBkC,EAkBlC,UAlBkC;AAmBlC,SAnBkC,CAA7B;;;AAuBP,AAAO,IAAMC,sBAAoB,IAAIpH,MAAJ,CAAWmH,uBAAqB9E,IAArB,CAA0B,GAA1B,CAAX,EAA2C,GAA3C,CAA1B;;;AAGP,AAAO,IAAMyJ,sBAAoB,IAAI9L,MAAJ,CAAW,qBAAX,EAAkC,GAAlC,CAA1B;;;;;;AAMP,AAAO,IAAMqH,yBAAuB,CAClC,OADkC,EAElC,QAFkC,EAGlC,QAHkC,EAIlC,KAJkC,EAKlC,UALkC,EAMlC,QANkC,EAOlC,QAPkC,EAQlC,OARkC,EASlC,MATkC,EAUlC,OAVkC,EAWlC,SAXkC,EAYlC,YAZkC,EAalC,SAbkC,EAclC,MAdkC,EAelC,QAfkC,EAgBlC,OAhBkC,EAiBlC,MAjBkC,EAkBlC,MAlBkC,EAmBlC,SAnBkC,EAoBlC,UApBkC;AAqBlC,MArBkC,EAsBlC,QAtBkC,EAuBlC,UAvBkC,EAwBlC,MAxBkC,EAyBlC,MAzBkC,EA0BlC,MA1BkC,EA2BlC,UA3BkC;AA4BlC,mBA5BkC,EA6BlC,MA7BkC,EA8BlC,WA9BkC,EA+BlC,MA/BkC,EAgClC,UAhCkC,EAiClC,OAjCkC,EAkClC,MAlCkC,EAmClC,OAnCkC,EAoClC,UApCkC;AAqClC,OArCkC,EAsClC,KAtCkC;AAuClC,SAvCkC,EAwClC,SAxCkC,EAyClC,cAzCkC;AA0ClC,QA1CkC,EA2ClC,WA3CkC,EA4ClC,OA5CkC,EA6ClC,UA7CkC,EA8ClC,UA9CkC,EA+ClC,MA/CkC,EAgDlC,SAhDkC,EAiDlC,SAjDkC,EAkDlC,OAlDkC,EAmDlC,KAnDkC,EAoDlC,SApDkC,EAqDlC,MArDkC,EAsDlC,OAtDkC,EAuDlC,QAvDkC,CAA7B;;AA0DP,AAAO,IAAMC,sBAAoB,IAAItH,MAAJ,CAAWqH,uBAAqBhF,IAArB,CAA0B,GAA1B,CAAX,EAA2C,GAA3C,CAA1B;;;AAGP,AAAO,AAAM0J;;;AAGb,AAAO,AAAMC;;;AAGb,AAAO,AAAMC;;;;AAIb,AAAO,AAAMxE;AAiDb,AAAO,AAAMC,AAAsCD;;;;;;AAMnD,IAAME,wBAAsBX,gCAA8B3E,IAA9B,CAAmC,GAAnC,CAA5B;AACA,AAAO,AAAMuF,AAAkCD,AAAX;;AAEpC,IAAME,wBAAsBZ,gCAA8B5E,IAA9B,CAAmC,GAAnC,CAA5B;AACA,AAAO,AAAMyF,AAAkCD,AAAX;;AAEpC,AAAO,AAAMqE,AAA8BrE,AAAhB,AAAyCF,AAAzC;;AAE3B,AAAO,IAAMwE,yBAAuB,IAAInM,MAAJ,CAAW,mBAAX,EAAgC,GAAhC,CAA7B;AACP,AAAO,IAAMoM,uBAAqB,IAAIpM,MAAJ,CAAW,4BAAX,EAAyC,GAAzC,CAA3B;AACP,AAAO,IAAMqM,aAAW,IAAIrM,MAAJ,CAAW,kBAAX,EAA+B,GAA/B,CAAjB,CAEP,AAAO,AAAMsM;;ACzSb;AACA,AAAe,SAASC,SAAT,CAAmB9G,IAAnB,EAAyB;MAChCwC,UAAUxC,KAAKG,IAAL,CAAU,OAAV,CAAhB;MACMsC,KAAKzC,KAAKG,IAAL,CAAU,IAAV,CAAX;MACI4G,QAAQ,CAAZ;;MAEItE,EAAJ,EAAQ;;QAEFd,oBAAkBvH,IAAlB,CAAuBqI,EAAvB,CAAJ,EAAgC;eACrB,EAAT;;QAEEZ,oBAAkBzH,IAAlB,CAAuBqI,EAAvB,CAAJ,EAAgC;eACrB,EAAT;;;;MAIAD,OAAJ,EAAa;QACPuE,UAAU,CAAd,EAAiB;;;UAGXpF,oBAAkBvH,IAAlB,CAAuBoI,OAAvB,CAAJ,EAAqC;iBAC1B,EAAT;;UAEEX,oBAAkBzH,IAAlB,CAAuBoI,OAAvB,CAAJ,EAAqC;iBAC1B,EAAT;;;;;;;QAOA4D,iBAAehM,IAAf,CAAoBoI,OAApB,CAAJ,EAAkC;eACvB,EAAT;;;;;;;QAOE6D,oBAAkBjM,IAAlB,CAAuBoI,OAAvB,CAAJ,EAAqC;eAC1B,EAAT;;;;SAIGuE,KAAP;;;ACnDF;;;AAGA,AAAe,SAASC,QAAT,CAAkB/G,KAAlB,EAAyB;SAC/BgH,WAAWhH,MAAME,IAAN,CAAW,OAAX,CAAX,KAAmC,IAA1C;;;ACJF;AACA,AAAe,SAAS+G,WAAT,CAAqBvN,IAArB,EAA2B;SACjC,CAACA,KAAKoB,KAAL,CAAW,IAAX,KAAoB,EAArB,EAAyBU,MAAhC;;;ACFF,IAAM0L,QAAQ,IAAI5M,MAAJ,CAAW,WAAX,EAAwB,GAAxB,CAAd;;AAEA,AAAe,SAAS6M,WAAT,CAAqBC,UAArB,EAAgD;MAAfnE,OAAe,uEAAL,GAAK;;MACvDoE,SAASD,aAAa,EAA5B;;MAEIC,SAAS,CAAb,EAAgB;QACVC,oBAAJ;;;;;;;QAOIJ,MAAM/M,IAAN,CAAW8I,OAAX,CAAJ,EAAyB;oBACToE,SAAS,CAAvB;KADF,MAEO;oBACSA,SAAS,IAAvB;;;WAGKE,KAAKC,GAAL,CAASD,KAAKE,GAAL,CAASH,WAAT,EAAsB,CAAtB,CAAT,EAAmC,CAAnC,CAAP;;;SAGK,CAAP;;;ACjBF;;AAEA,AAAe,SAASI,iBAAT,CAAwB3H,IAAxB,EAA8B;MACvC+G,QAAQ,CAAZ;MACMpN,OAAOqG,KAAKrG,IAAL,GAAYE,IAAZ,EAAb;MACMwN,aAAa1N,KAAK8B,MAAxB;;;MAGI4L,aAAa,EAAjB,EAAqB;WACZ,CAAP;;;;WAIOH,YAAYvN,IAAZ,CAAT;;;;WAISyN,YAAYC,UAAZ,CAAT;;;;;;MAMI1N,KAAKuD,KAAL,CAAW,CAAC,CAAZ,MAAmB,GAAvB,EAA4B;aACjB,CAAT;;;SAGK6J,KAAP;;;AChCa,SAASa,QAAT,CAAkB3H,KAAlB,EAAyBN,CAAzB,EAA4BoH,KAA5B,EAAmC;QAC1C5G,IAAN,CAAW,OAAX,EAAoB4G,KAApB;SACO9G,KAAP;;;ACGa,SAAS4H,WAAT,CAAkB5H,KAAlB,EAAyBN,CAAzB,EAA4BmI,MAA5B,EAAoC;MAC7C;QACIf,QAAQgB,kBAAe9H,KAAf,EAAsBN,CAAtB,IAA2BmI,MAAzC;aACS7H,KAAT,EAAgBN,CAAhB,EAAmBoH,KAAnB;GAFF,CAGE,OAAOiB,CAAP,EAAU;;;;SAIL/H,KAAP;;;ACXF;AACA,AAAe,SAASgI,cAAT,CAAqBjI,IAArB,EAA2BL,CAA3B,EAA8BoH,KAA9B,EAAqC;MAC5ClB,SAAS7F,KAAK6F,MAAL,EAAf;MACIA,MAAJ,EAAY;gBACDA,MAAT,EAAiBlG,CAAjB,EAAoBoH,QAAQ,IAA5B;;;SAGK/G,IAAP;;;ACFF;;;AAGA,AAAe,SAAS+H,iBAAT,CAAwB9H,KAAxB,EAA+BN,CAA/B,EAAsD;MAApBuI,WAAoB,uEAAN,IAAM;;MAC/DnB,QAAQC,SAAS/G,KAAT,CAAZ;;MAEI8G,KAAJ,EAAW;WACFA,KAAP;;;UAGMoB,aAAUlI,KAAV,CAAR;;MAEIiI,WAAJ,EAAiB;aACNpB,UAAU7G,KAAV,CAAT;;;iBAGUA,KAAZ,EAAmBN,CAAnB,EAAsBoH,KAAtB;;SAEOA,KAAP;;;AClBF;;AAEA,AAAe,SAASoB,YAAT,CAAmBlI,KAAnB,EAA0B;mBACnBA,MAAM3B,GAAN,CAAU,CAAV,CADmB;MAC/B4E,OAD+B,cAC/BA,OAD+B;;;;;;;MAMnCwD,uBAAqBtM,IAArB,CAA0B8I,OAA1B,CAAJ,EAAwC;WAC/ByE,kBAAe1H,KAAf,CAAP;GADF,MAEO,IAAIiD,QAAQxH,WAAR,OAA0B,KAA9B,EAAqC;WACnC,CAAP;GADK,MAEA,IAAIiL,qBAAmBvM,IAAnB,CAAwB8I,OAAxB,CAAJ,EAAsC;WACpC,CAAP;GADK,MAEA,IAAI0D,WAASxM,IAAT,CAAc8I,OAAd,CAAJ,EAA4B;WAC1B,CAAC,CAAR;GADK,MAEA,IAAIA,QAAQxH,WAAR,OAA0B,IAA9B,EAAoC;WAClC,CAAC,CAAR;;;SAGK,CAAP;;;ACjBF,SAASqI,cAAT,CAAsB9D,KAAtB,EAA6BN,CAA7B,EAAgC;MAC1BM,MAAM3B,GAAN,CAAU,CAAV,CAAJ,EAAkB;qBACI2B,MAAM3B,GAAN,CAAU,CAAV,CADJ;QACR4E,OADQ,cACRA,OADQ;;QAGZA,YAAY,MAAhB,EAAwB;;uBAERjD,KAAd,EAAqBN,CAArB,EAAwB,KAAxB;;;;;AAKN,SAASyI,UAAT,CAAoBnI,KAApB,EAA2BN,CAA3B,EAA8BoH,KAA9B,EAAqC;MAC/B9G,KAAJ,EAAW;mBACIA,KAAb,EAAoBN,CAApB;gBACSM,KAAT,EAAgBN,CAAhB,EAAmBoH,KAAnB;;;;AAIJ,SAASsB,OAAT,CAAiB1I,CAAjB,EAAoBuI,WAApB,EAAiC;IAC7B,QAAF,EAAY3F,GAAZ,CAAgB,SAAhB,EAA2BzC,IAA3B,CAAgC,UAACxE,KAAD,EAAQ0E,IAAR,EAAiB;;;QAG3CC,QAAQN,EAAEK,IAAF,CAAZ;YACQ4H,SAAS3H,KAAT,EAAgBN,CAAhB,EAAmBoI,kBAAe9H,KAAf,EAAsBN,CAAtB,EAAyBuI,WAAzB,CAAnB,CAAR;;QAEMI,UAAUrI,MAAM4F,MAAN,EAAhB;QACM0C,WAAWJ,aAAUlI,KAAV,CAAjB;;eAEWqI,OAAX,EAAoB3I,CAApB,EAAuB4I,QAAvB,EAAiCL,WAAjC;QACII,OAAJ,EAAa;;;iBAGAA,QAAQzC,MAAR,EAAX,EAA6BlG,CAA7B,EAAgC4I,WAAW,CAA3C,EAA8CL,WAA9C;;GAbJ;;SAiBOvI,CAAP;;;;;AAKF,AAAe,SAAS6I,eAAT,CAAsB7I,CAAtB,EAA6C;MAApBuI,WAAoB,uEAAN,IAAM;;;;4BAGlCO,OAAxB,CAAgC,gBAAqC;;QAAnCC,cAAmC;QAAnBC,aAAmB;;MAC9DD,cAAL,SAAuBC,aAAvB,EAAwC7I,IAAxC,CAA6C,UAACxE,KAAD,EAAQ0E,IAAR,EAAiB;kBACnDL,EAAEK,IAAF,EAAQ6F,MAAR,CAAe6C,cAAf,CAAT,EAAyC/I,CAAzC,EAA4C,EAA5C;KADF;GADF;;;;;;;UAWQA,CAAR,EAAWuI,WAAX;UACQvI,CAAR,EAAWuI,WAAX;;SAEOvI,CAAP;;;AC3DF;;;;;AAKA,AAAe,SAASiJ,aAAT,CAAuBC,UAAvB,EAAmCC,QAAnC,EAA6CnJ,CAA7C,EAAgD;MACzD,CAACkJ,WAAWhD,MAAX,GAAoBpK,MAAzB,EAAiC;WACxBoN,UAAP;;;MAGIE,wBAAwBvB,KAAKE,GAAL,CAAS,EAAT,EAAaoB,WAAW,IAAxB,CAA9B;MACME,cAAcrJ,EAAE,aAAF,CAApB;;aAEWkG,MAAX,GAAoB/B,QAApB,GAA+BhE,IAA/B,CAAoC,UAACxE,KAAD,EAAQ+H,OAAR,EAAoB;QAChD4F,WAAWtJ,EAAE0D,OAAF,CAAjB;;QAEI4C,4BAA0B7L,IAA1B,CAA+BiJ,QAAQH,OAAvC,CAAJ,EAAqD;aAC5C,IAAP;;;QAGIgG,eAAelC,SAASiC,QAAT,CAArB;QACIC,YAAJ,EAAkB;UACZD,SAAS3K,GAAT,CAAa,CAAb,MAAoBuK,WAAWvK,GAAX,CAAe,CAAf,CAAxB,EAA2C;oBAC7B6K,MAAZ,CAAmBF,QAAnB;OADF,MAEO;YACDG,eAAe,CAAnB;YACMC,UAAUC,YAAYL,QAAZ,CAAhB;;;;YAIII,UAAU,IAAd,EAAoB;0BACF,EAAhB;;;;;YAKEA,WAAW,GAAf,EAAoB;0BACF,EAAhB;;;;;YAKEJ,SAAS9I,IAAT,CAAc,OAAd,MAA2B0I,WAAW1I,IAAX,CAAgB,OAAhB,CAA/B,EAAyD;0BACvC2I,WAAW,GAA3B;;;YAGIS,WAAWL,eAAeE,YAAhC;;YAEIG,YAAYR,qBAAhB,EAAuC;iBAC9BC,YAAYG,MAAZ,CAAmBF,QAAnB,CAAP;SADF,MAEO,IAAI5F,QAAQH,OAAR,KAAoB,GAAxB,EAA6B;cAC5BsG,iBAAiBP,SAAStP,IAAT,EAAvB;cACM8P,uBAAuBpC,WAAWmC,cAAX,CAA7B;;cAEIC,uBAAuB,EAAvB,IAA6BJ,UAAU,IAA3C,EAAiD;mBACxCL,YAAYG,MAAZ,CAAmBF,QAAnB,CAAP;WADF,MAEO,IAAIQ,wBAAwB,EAAxB,IAA8BJ,YAAY,CAA1C,IACDvM,eAAe0M,cAAf,CADH,EACmC;mBACjCR,YAAYG,MAAZ,CAAmBF,QAAnB,CAAP;;;;;;WAMD,IAAP;GAnDF;;MAsDID,YAAYlF,QAAZ,GAAuBrI,MAAvB,KAAkC,CAAlC,IACFuN,YAAYlF,QAAZ,GAAuB4F,KAAvB,GAA+BpL,GAA/B,CAAmC,CAAnC,MAA0CuK,WAAWvK,GAAX,CAAe,CAAf,CAD5C,EAC+D;WACtDuK,UAAP;;;SAGKG,WAAP;;;AC7EF;;AAEA,AAAe,SAASW,mBAAT,CAA0BhK,CAA1B,EAA6B;MACtCkJ,mBAAJ;MACIC,WAAW,CAAf;;IAEE,SAAF,EAAahJ,IAAb,CAAkB,UAACxE,KAAD,EAAQ0E,IAAR,EAAiB;;QAE7BiG,4BAA0B7L,IAA1B,CAA+B4F,KAAKkD,OAApC,CAAJ,EAAkD;;;;QAI5CjD,QAAQN,EAAEK,IAAF,CAAd;QACM+G,QAAQC,SAAS/G,KAAT,CAAd;;QAEI8G,QAAQ+B,QAAZ,EAAsB;iBACT/B,KAAX;mBACa9G,KAAb;;GAXJ;;;;MAiBI,CAAC4I,UAAL,EAAiB;WACRlJ,EAAE,MAAF,KAAaA,EAAE,GAAF,EAAO+J,KAAP,EAApB;;;eAGWd,cAAcC,UAAd,EAA0BC,QAA1B,EAAoCnJ,CAApC,CAAb;;SAEOkJ,UAAP;;;ACjCF,UACA,AACA,AACA,AACA,AACA,AACA,AACA,AACA,AACA,AACA,AACA,AACA;;ACEA,SAASe,mBAAT,CAA6B3J,KAA7B,EAAoCN,CAApC,EAAuCkK,MAAvC,EAA+C;;;;;MAKzC5J,MAAM6J,QAAN,CAAe,qBAAf,CAAJ,EAA2C;;;;MAIrC9M,UAAUtD,gBAAgBuG,MAAMtG,IAAN,EAAhB,CAAhB;;MAEIuN,YAAYlK,OAAZ,IAAuB,EAA3B,EAA+B;QACvB+M,SAASpK,EAAE,GAAF,EAAOM,KAAP,EAAcxE,MAA7B;QACMuO,aAAarK,EAAE,OAAF,EAAWM,KAAX,EAAkBxE,MAArC;;;QAGIuO,aAAcD,SAAS,CAA3B,EAA+B;YACvBpH,MAAN;;;;QAIItD,gBAAgBrC,QAAQvB,MAA9B;QACMwO,WAAWtK,EAAE,KAAF,EAASM,KAAT,EAAgBxE,MAAjC;;;;QAII4D,gBAAgB,EAAhB,IAAsB4K,aAAa,CAAvC,EAA0C;YAClCtH,MAAN;;;;QAII0G,UAAUC,YAAYrJ,KAAZ,CAAhB;;;;;QAKI4J,SAAS,EAAT,IAAeR,UAAU,GAAzB,IAAgChK,gBAAgB,EAApD,EAAwD;YAChDsD,MAAN;;;;;;QAMEkH,UAAU,EAAV,IAAgBR,UAAU,GAA9B,EAAmC;;;;UAI3BnG,UAAUjD,MAAM3B,GAAN,CAAU,CAAV,EAAa4E,OAAb,CAAqBxH,WAArB,EAAhB;UACMwO,aAAahH,YAAY,IAAZ,IAAoBA,YAAY,IAAnD;UACIgH,UAAJ,EAAgB;YACRC,eAAelK,MAAMmK,IAAN,EAArB;YACID,gBAAgBzQ,gBAAgByQ,aAAaxQ,IAAb,EAAhB,EAAqCuD,KAArC,CAA2C,CAAC,CAA5C,MAAmD,GAAvE,EAA4E;;;;;YAKxEyF,MAAN;;;;QAII0H,cAAc1K,EAAE,QAAF,EAAYM,KAAZ,EAAmBxE,MAAvC;;;QAGI4O,cAAc,CAAd,IAAmBhL,gBAAgB,GAAvC,EAA4C;YACpCsD,MAAN;;;;;;;;;;;;;AAaN,AAAe,SAAS2H,YAAT,CAAmBrF,QAAnB,EAA6BtF,CAA7B,EAAgC;IAC3CyB,wBAAF,EAA4B6D,QAA5B,EAAsCnF,IAAtC,CAA2C,UAACxE,KAAD,EAAQ0E,IAAR,EAAiB;QACpDC,QAAQN,EAAEK,IAAF,CAAd;;QAEIC,MAAM6J,QAAN,CAAevJ,UAAf,KAA8BN,MAAM/F,IAAN,OAAeqG,UAAf,EAA6B9E,MAA7B,GAAsC,CAAxE,EAA2E;;QAEvEoO,SAAS7C,SAAS/G,KAAT,CAAb;QACI,CAAC4J,MAAL,EAAa;eACF9B,kBAAe9H,KAAf,EAAsBN,CAAtB,CAAT;eACSM,KAAT,EAAgBN,CAAhB,EAAmBkK,MAAnB;;;;QAIEA,SAAS,CAAb,EAAgB;YACRlH,MAAN;KADF,MAEO;;0BAEe1C,KAApB,EAA2BN,CAA3B,EAA8BkK,MAA9B;;GAhBJ;;SAoBOlK,CAAP;;;AC3Ga,SAAS4K,YAAT,CAAsBtF,QAAtB,EAAgCtF,CAAhC,EAA+C;MAAZ6K,KAAY,uEAAJ,EAAI;;IAC1DlJ,eAAF,EAAmB2D,QAAnB,EAA6BnF,IAA7B,CAAkC,UAACxE,KAAD,EAAQmP,MAAR,EAAmB;QAC7CC,UAAU/K,EAAE8K,MAAF,CAAhB;;;;;QAKI9K,EAAE+K,OAAF,EAAWzF,QAAX,EAAqB0F,OAArB,CAA6B,GAA7B,EAAkClP,MAAlC,KAA6C,CAAjD,EAAoD;aAC3CiP,QAAQ/H,MAAR,EAAP;;;;QAIEjJ,gBAAgBiG,EAAE8K,MAAF,EAAU9Q,IAAV,EAAhB,MAAsC6Q,KAA1C,EAAiD;aACxCE,QAAQ/H,MAAR,EAAP;;;;;QAKEmE,UAAUnH,EAAE8K,MAAF,CAAV,IAAuB,CAA3B,EAA8B;aACrBC,QAAQ/H,MAAR,EAAP;;;WAGK+H,OAAP;GArBF;;SAwBO/K,CAAP;;;AC5BF;;AAEA,AAAe,SAASiL,kBAAT,CAAyBxF,OAAzB,EAAkCzF,CAAlC,EAAqC;;;;MAI9CyE,iBAAczE,EAAE,MAAF,CAAd,EAAyBA,CAAzB,EAA4B,KAA5B,CAAJ;MACIyE,iBAAczE,EAAE,MAAF,CAAd,EAAyBA,CAAzB,EAA4B,KAA5B,CAAJ;;SAEOA,CAAP;;;ACJF,SAASkL,UAAT,CAAoBlL,CAApB,EAAuBmL,OAAvB,EAAgC3K,IAAhC,EAAsC4K,QAAtC,EAAgD;UACxC5K,IAAN,QAAe4K,QAAf,EAAyBjL,IAAzB,CAA8B,UAACC,CAAD,EAAIC,IAAJ,EAAa;QACnCqE,QAAQC,SAAStE,IAAT,CAAd;QACMjG,MAAMsK,MAAMlE,IAAN,CAAZ;;QAEIpG,GAAJ,EAAS;UACDiR,cAAclP,IAAI0C,OAAJ,CAAYsM,OAAZ,EAAqB/Q,GAArB,CAApB;cACQiG,IAAR,EAAcG,IAAd,EAAoB6K,WAApB;;GANJ;;;AAWF,AAAe,SAASC,oBAAT,CAA2BF,QAA3B,EAAqCpL,CAArC,EAAwC5F,GAAxC,EAA6C;GACzD,MAAD,EAAS,KAAT,EAAgB0O,OAAhB,CAAwB;WAAQoC,WAAWlL,CAAX,EAAc5F,GAAd,EAAmBoG,IAAnB,EAAyB4K,QAAzB,CAAR;GAAxB;;SAEOA,QAAP;;;ACtBK,SAAS1D,UAAT,CAAoB1N,IAApB,EAA0B;SACxBA,KAAKE,IAAL,GACKD,OADL,CACa,MADb,EACqB,GADrB,EAEK6B,MAFZ;;;;;;AAQF,AAAO,SAAS6N,WAAT,CAAqBrJ,KAArB,EAA4B;MAC3BiL,kBAAkB7D,WAAWpH,MAAMtG,IAAN,EAAX,CAAxB;;MAEMwR,WAAWlL,MAAM/F,IAAN,CAAW,GAAX,EAAgBP,IAAhB,EAAjB;MACMyR,aAAa/D,WAAW8D,QAAX,CAAnB;;MAEID,kBAAkB,CAAtB,EAAyB;WAChBE,aAAaF,eAApB;GADF,MAEO,IAAIA,oBAAoB,CAApB,IAAyBE,aAAa,CAA1C,EAA6C;WAC3C,CAAP;;;SAGK,CAAP;;;ACnBF;;AAEA,AAAe,SAASC,kBAAT,CACb1L,CADa,EAEb2L,SAFa,EAGbC,WAHa,EAKb;MADAjB,YACA,uEADY,IACZ;;MACMkB,aAAaF,UAAUG,MAAV,CAAiB;WAAQF,YAAYG,OAAZ,CAAoBC,IAApB,MAA8B,CAAC,CAAvC;GAAjB,CAAnB;;;;;;;;UAEWA,IAHX;;UAIQC,OAAO,MAAb;UACM1L,QAAQ,OAAd;;UAEM2L,QAAQlM,YAAUiM,IAAV,UAAmBD,IAAnB,QAAd;;;;;UAKMG,SACJD,MAAMjL,GAAN,CAAU,UAACtF,KAAD,EAAQ0E,IAAR;eAAiBL,EAAEK,IAAF,EAAQG,IAAR,CAAaD,KAAb,CAAjB;OAAV,EACM6L,OADN,GAEMN,MAFN,CAEa;eAAQ9R,SAAS,EAAjB;OAFb,CADF;;;;;;UASImS,OAAOrQ,MAAP,KAAkB,CAAtB,EAAyB;YACnBuQ,kBAAJ;;;YAGI1B,YAAJ,EAAe;sBACD2B,UAAUH,OAAO,CAAP,CAAV,EAAqBnM,CAArB,CAAZ;SADF,MAEO;sBACOmM,OAAO,CAAP,CAAZ;;;;aAGKE;;;;;sCA5BQR,UAAnB,4GAA+B;;;;;;;;;;;;;;;;;;;;;;SAiCxB,IAAP;;;AC3CF,SAASU,UAAT,CAAoBjM,KAApB,EAA2BkM,WAA3B,EAAwC;;;MAGlClM,MAAM6D,QAAN,GAAiBrI,MAAjB,GAA0B0Q,WAA9B,EAA2C;WAClC,KAAP;;;MAGEC,iBAAcnM,KAAd,CAAJ,EAA0B;WACjB,KAAP;;;SAGK,IAAP;;;;;;AAMF,AAAe,SAASoM,uBAAT,CACb1M,CADa,EAEb2M,SAFa,EAKb;MAFAH,WAEA,uEAFc,CAEd;MADAI,QACA,uEADW,IACX;;;;;;sCACuBD,SAAvB,4GAAkC;UAAvBzL,QAAuB;;UAC1BgL,QAAQlM,EAAEkB,QAAF,CAAd;;;;UAIIgL,MAAMpQ,MAAN,KAAiB,CAArB,EAAwB;YAChBwE,QAAQN,EAAEkM,MAAM,CAAN,CAAF,CAAd;;YAEIK,WAAWjM,KAAX,EAAkBkM,WAAlB,CAAJ,EAAoC;cAC9BnP,gBAAJ;cACIuP,QAAJ,EAAc;sBACFtM,MAAMtG,IAAN,EAAV;WADF,MAEO;sBACKsG,MAAMwE,IAAN,EAAV;;;cAGEzH,OAAJ,EAAa;mBACJA,OAAP;;;;;;;;;;;;;;;;;;;;SAMD,IAAP;;;AChDF;AACA,AAAe,SAASiP,SAAT,CAAmBtS,IAAnB,EAAyBgG,CAAzB,EAA4B;;;MAGnC6M,YAAY7M,aAAWhG,IAAX,cAA0BA,IAA1B,EAAlB;SACO6S,cAAc,EAAd,GAAmB7S,IAAnB,GAA0B6S,SAAjC;;;ACHa,SAASJ,gBAAT,CAAuBnM,KAAvB,EAA8B;MACrCiE,UAAUjE,MAAMiE,OAAN,GAAgB6H,OAAhB,EAAhB;MACMU,gBAAgBvI,QAAQhK,IAAR,CAAa,UAAC2L,MAAD,EAAY;QACvCxB,QAAQC,SAASuB,MAAT,CAAd;QACe6G,SAF8B,GAEZrI,KAFY,CAErCsI,KAFqC;QAEnBlK,EAFmB,GAEZ4B,KAFY,CAEnB5B,EAFmB;;QAGvCC,aAAgBgK,SAAhB,SAA6BjK,EAAnC;WACOC,WAAWlG,QAAX,CAAoB,SAApB,CAAP;GAJoB,CAAtB;;SAOOiQ,kBAAkBG,SAAzB;;;ACXF;;;;AAIA,AAAe,SAASC,gBAAT,CAA0B5M,KAA1B,EAAiC;SACvCA,MAAMtG,IAAN,GAAaE,IAAb,GAAoB4B,MAApB,IAA8B,GAArC;;;ACHa,SAASqR,WAAT,CAAqBnN,CAArB,EAAwB;SAC9BA,EAAEmC,cAAF,EAAkBrG,MAAlB,GAA2B,CAAlC;;;ACHa,SAAS6I,QAAT,CAAkBtE,IAAlB,EAAwB;MAC7B+M,OAD6B,GACL/M,IADK,CAC7B+M,OAD6B;MACpBC,UADoB,GACLhN,IADK,CACpBgN,UADoB;;;MAGjC,CAACD,OAAD,IAAYC,UAAhB,EAA4B;QACpB3I,QAAQ,iBAAgB2I,UAAhB,EAA4B3Q,MAA5B,CAAmC,UAACC,GAAD,EAAMhB,KAAN,EAAgB;UACzD6E,OAAO6M,WAAW1R,KAAX,CAAb;;UAEI,CAAC6E,KAAKwL,IAAN,IAAc,CAACxL,KAAKD,KAAxB,EAA+B,OAAO5D,GAAP;;UAE3B6D,KAAKwL,IAAT,IAAiBxL,KAAKD,KAAtB;aACO5D,GAAP;KANY,EAOX,EAPW,CAAd;WAQO+H,KAAP;;;SAGK0I,OAAP;;;ACfa,SAASE,OAAT,CAAiBjN,IAAjB,EAAuBG,IAAvB,EAA6B+M,GAA7B,EAAkC;MAC3ClN,KAAK+M,OAAT,EAAkB;SACXA,OAAL,CAAa5M,IAAb,IAAqB+M,GAArB;GADF,MAEO,IAAIlN,KAAKgN,UAAT,EAAqB;SACrBG,YAAL,CAAkBhN,IAAlB,EAAwB+M,GAAxB;;;SAGKlN,IAAP;;;ACPa,SAASoN,QAAT,CAAkBpN,IAAlB,EAAwBqE,KAAxB,EAA+B;MACxCrE,KAAK+M,OAAT,EAAkB;SACXA,OAAL,GAAe1I,KAAf;GADF,MAEO,IAAIrE,KAAKgN,UAAT,EAAqB;WACnBhN,KAAKgN,UAAL,CAAgBvR,MAAhB,GAAyB,CAAhC,EAAmC;WAC5B4R,eAAL,CAAqBrN,KAAKgN,UAAL,CAAgB,CAAhB,EAAmBrB,IAAxC;;;qBAGctH,KAAhB,EAAuBoE,OAAvB,CAA+B,UAACjE,GAAD,EAAS;WACjC2I,YAAL,CAAkB3I,GAAlB,EAAuBH,MAAMG,GAAN,CAAvB;KADF;;;SAKKxE,IAAP;;;ACbF,mBACA,AACA,AACA,AACA,AACA,AACA,AACA,AACA,AACA,AACA,AACA,AACA,AACA,AACA,AACA,AACA,AACA,AACA,AACA,AACA,AACA,AACA,AACA,AACA,AACA;;ACzBO,IAAMsN,UAAU,IAAI/S,MAAJ,CAAW,WAAX,EAAwB,GAAxB,CAAhB;AACP,AAAO,IAAMgT,WAAW,IAAIhT,MAAJ,CAAW,kBAAX,EAA+B,GAA/B,CAAjB;;AAEP,AAAO,IAAMiT,iBAAiB,CAC5B,QAD4B,EAE5B,OAF4B,EAG5B,MAH4B,EAI5B5Q,IAJ4B,CAIvB,GAJuB,CAAvB;;ACIP;;;;;AAKA,AAAe,SAAS6Q,uBAAT,CAAiC9N,CAAjC,EAAoC;IAC/C,KAAF,EAASG,IAAT,CAAc,UAACC,CAAD,EAAImF,GAAJ,EAAY;QAClBb,QAAQC,SAASY,GAAT,CAAd;;qBAEgBb,KAAhB,EAAuBoE,OAAvB,CAA+B,UAACtI,IAAD,EAAU;UACjCD,QAAQmE,MAAMlE,IAAN,CAAd;;UAEIA,SAAS,KAAT,IAAkBmN,QAAQlT,IAAR,CAAa8F,KAAb,CAAlB,IACAqN,SAASnT,IAAT,CAAc8F,KAAd,CADJ,EAC0B;UACtBgF,GAAF,EAAO/E,IAAP,CAAY,KAAZ,EAAmBD,KAAnB;;KALJ;GAHF;;SAaOP,CAAP;;;ACxBF,SAAS+N,SAAT,CAAmBpS,KAAnB,EAA0B0E,IAA1B,EAAgC;SACvBA,KAAK4L,IAAL,KAAc,SAArB;;;AAGF,SAAS+B,aAAT,CAAuBhO,CAAvB,EAA0B;IACtBiO,IAAF,GAAS1T,IAAT,CAAc,GAAd,EACSwK,QADT,GAES+G,MAFT,CAEgBiC,SAFhB,EAGS/K,MAHT;;SAKOhD,CAAP;;;AAGF,AAAe,SAASkO,KAAT,CAAelO,CAAf,EAAkB;IAC7B6N,cAAF,EAAkB7K,MAAlB;;MAEIgL,cAAchO,CAAd,CAAJ;SACOA,CAAP;;;ACRF,IAAMmO,WAAW;;;;;;;;QAAA,kBAQF/T,GARE,EAQGgU,gBARH,EAQqBlS,SARrB,EAQgC;;;;;;;;;oBAAA;;mBAGzCkS,gBAHyC;;;;;2BAAA,GAIrB;+BACL,IADK;4BAER,GAFQ;yBAGX;kCACS,WADT;oCAEW;;eATqB;;;uBAalC,EAAEnP,MAAMmP,gBAAR,EAA0BpP,UAAUqP,aAApC,EAAT;;;;;;qBAEevO,gBAAc1F,GAAd,EAAmB8B,SAAnB,CAf4B;;;oBAAA;;;mBAkBzCoS,OAAO/O,KAlBkC;;;;;qBAmBpCgP,MAAP,GAAgB,IAAhB;+CACOD,MApBoC;;;+CAuBtC,MAAKE,WAAL,CAAiBF,MAAjB,CAvBsC;;;;;;;;;GARhC;aAAA,6BAkC0B;QAArBjR,OAAqB,QAA3B4B,IAA2B;QAAZD,QAAY,QAAZA,QAAY;QACfS,WADe,GACCT,SAASQ,OADV,CAC/B,cAD+B;;;;;QAKnC,CAACC,YAAY5C,QAAZ,CAAqB,MAArB,CAAD,IACA,CAAC4C,YAAY5C,QAAZ,CAAqB,MAArB,CADL,EACmC;YAC3B,IAAIyC,KAAJ,CAAU,qCAAV,CAAN;;;QAGEU,IAAI,KAAKyO,SAAL,CAAe,EAAEpR,gBAAF,EAAWoC,wBAAX,EAAf,CAAR;;QAEIO,EAAEiO,IAAF,GAAS9J,QAAT,GAAoBrI,MAApB,KAA+B,CAAnC,EAAsC;YAC9B,IAAIwD,KAAJ,CAAU,kCAAV,CAAN;;;QAGEoB,kBAAkBV,CAAlB,CAAJ;QACI8N,wBAAwB9N,CAAxB,CAAJ;QACIkO,MAAMlO,CAAN,CAAJ;;WAEOA,CAAP;GAtDa;WAAA,4BAyDqB;QAAxB3C,OAAwB,SAAxBA,OAAwB;QAAfoC,WAAe,SAAfA,WAAe;;QAC5B/B,WAAWF,YAAYiC,WAAZ,CAAjB;QACIiP,iBAAiB9Q,MAAM+Q,MAAN,CAAatR,OAAb,EAAsBK,QAAtB,CAArB;QACIsC,IAAI3B,QAAQuQ,IAAR,CAAaF,cAAb,CAAR;;;QAGMG,kBAAkB7O,EAAE,+BAAF,EAAmCQ,IAAnC,CAAwC,SAAxC,CAAxB;QACMsO,iBAAiBtR,YAAYqR,eAAZ,CAAvB;;;QAGIC,mBAAmBpR,QAAvB,EAAiC;uBACdE,MAAM+Q,MAAN,CAAatR,OAAb,EAAsByR,cAAtB,CAAjB;UACIzQ,QAAQuQ,IAAR,CAAaF,cAAb,CAAJ;;;WAGK1O,CAAP;;CAxEJ,CA4EA;;ACvFA,IAAM+O,QAAQ,SAARA,KAAQ,CAACC,SAAD,EAAYC,OAAZ;SACZA,QAAQvS,MAAR,CAAe,UAACC,GAAD,EAAMuS,MAAN,EAAiB;QAC1BA,MAAJ,IAAcF,SAAd;WACOrS,GAAP;GAFF,EAGG,EAHH,CADY;CAAd;;AAOA,AAAe,SAASwS,qBAAT,CAA+BH,SAA/B,EAA0C;SAChDA,UAAUI,gBAAV,GACLL,MAAMC,SAAN,GAAkBA,UAAUE,MAA5B,4BAAuCF,UAAUI,gBAAjD,GADK,GAGLL,MAAMC,SAAN,EAAiB,CAACA,UAAUE,MAAX,CAAjB,CAHF;;;ACRK,IAAMG,mBAAmB;UACtB,cADsB;WAErB;;;;eAII,CACT,wBADS,CAJJ;;;WASA,EATA;;;gBAaK;gBACA;;GAhBgB;;UAoBtB;eACK,CACT,mBADS;GArBiB;;SA0BvB;eACM,CACT,gBADS;GA3BiB;;kBAgCd;eACH,CACT,kBADS;;CAjCR;;ACAA,IAAMC,iBAAiB;UACpB,WADoB;WAEnB;;eAEI,CACT,qBADS,EAET,cAFS,EAGT,iBAHS,CAFJ;;;WASA,CACL,KADK,EAEL,uBAFK,CATA;;;;;;;;gBAoBK;;UAEN,IAFM;;;gBAKA,kBAAChP,KAAD,EAAQN,CAAR,EAAc;YAChBuP,YAAYvP,EAAE1B,OAAF,GAAY0B,EAAEM,MAAMtG,IAAN,EAAF,CAAZ,GAA8BsG,MAAM6D,QAAN,EAAhD;YACIoL,UAAUzT,MAAV,KAAqB,CAArB,IAA0ByT,UAAU5Q,GAAV,CAAc,CAAd,MAAqBsO,SAA/C,IACFsC,UAAU5Q,GAAV,CAAc,CAAd,EAAiB4E,OAAjB,CAAyBxH,WAAzB,OAA2C,KAD7C,EACoD;iBAC3C,QAAP;;;eAGK,IAAP;;;GAlCsB;;SAuCrB;eACM,CACT,uBADS,EAET,qBAFS,EAGT,IAHS;GAxCe;;UA+CpB;eACK,CACT,aADS,EAET,sBAFS;GAhDe;;OAsDvB;eACQ,CACT,sBADS;GAvDe;;kBA4DZ;eACH,CACT,CAAC,kCAAD,EAAqC,UAArC,CADS,EAET,wBAFS;;CA7DR;;ACAA,IAAMyT,qBAAqB;UACxB,eADwB;WAEvB;eACI,CACT,kBADS,CADJ;;oBAKS,KALT;;;gBAQK;sBACM,oBAAClP,KAAD,EAAW;YACnBqI,UAAUrI,MAAMiE,OAAN,CAAc,UAAd,CAAhB;;YAEIoE,QAAQxE,QAAR,CAAiB,KAAjB,EAAwBrI,MAAxB,KAAmC,CAAvC,EAA0C;kBAChC2T,OAAR,CAAgBnP,KAAhB;;OALM;0BAQU,YARV;kBASE;KAjBP;;;WAqBA,CACL,iBADK,EAEL,oCAFK,EAGL,MAHK,EAIL,SAJK;;GAvBuB;;UAgCxB,wBAhCwB;;SAkCzB;eACM,CACT,UADS;GAnCmB;;kBAwChB;eACH,CACT,sBADS;;;CAzCR;;ACAA,IAAMoP,mBAAmB;UACtB,aADsB;;WAGrB;gBACK;;;;;+BAKe,2BAACpP,KAAD,EAAQN,CAAR,EAAc;YAC/B2P,SAASrP,MAAM/F,IAAN,CAAW,QAAX,CAAf;YACMqV,kBAAkB5P,EAAE,iCAAF,CAAxB;wBACgBwJ,MAAhB,CAAuBmG,MAAvB;cACM7L,WAAN,CAAkB8L,eAAlB;OATQ;;;;SAcP;KAfE;;eAkBI,CACT,uBADS,CAlBJ;;oBAsBS,KAtBT;;WAwBA,CACL,qBADK,EAEL,QAFK,EAGL,sBAHK;GA3BqB;;UAkCtB;eACK,CACT,kCADS;GAnCiB;;kBAwCd;eACH,CACT,CAAC,4CAAD,EAA+C,cAA/C,CADS;;;CAzCR;;ACAA,IAAMC,mBAAmB;UACtB,iBADsB;;SAGvB;eACM,CACT,eADS,EAET,yBAFS,EAGT,aAHS;GAJiB;;UAWtB;eACK,CACT,CAAC,qBAAD,EAAwB,OAAxB,CADS,EAET,WAFS,EAGT,SAHS;GAZiB;;WAmBrB;eACI,CACT,cADS,EAET,eAFS,CADJ;;gBAMK;oBACI,kBAACvP,KAAD,EAAW;YACnBwP,MAAMxP,MAAME,IAAN,CAAW,KAAX,CAAV;;;;;;;;;;YAUM2E,QAAQ,GAAd;;cAEM2K,IAAI7V,OAAJ,CAAY,UAAZ,EAAwBkL,KAAxB,CAAN;cACM3E,IAAN,CAAW,KAAX,EAAkBsP,GAAlB;;KArBG;;WAyBA,CACL,KADK,EAEL,qBAFK,EAGL,2BAHK,EAIL,kBAJK,EAKL,mBALK,EAML,QANK,EAOL,kBAPK,EAQL,SARK,EASL,WATK,EAUL,eAVK,EAWL,YAXK,EAYL,qBAZK;GA5CqB;;kBA4Dd;eACH,CACT,CAAC,gCAAD,EAAmC,OAAnC,CADS;GA7DiB;;kBAkEd;eACH,CACT,CAAC,uBAAD,EAA0B,OAA1B,CADS;GAnEiB;;OAwEzB,IAxEyB;;iBA0Ef,IA1Ee;;WA4ErB;CA5EJ;;ACAP;;AAEA,AAAO,IAAMC,uBAAuB;UAC1B,qBAD0B;SAE3B;eACM,CACT,QADS;GAHqB;;UAQ1B;eACK,CACT,0DADS;GATqB;;WAczB;eACI,CACT,CAAC,gCAAD,EAAmC,eAAnC,CADS,EAET,eAFS,CADJ;;;;gBAQK,EARL;;;;;WAcA,CACL,cADK,EAEL,UAFK;GA5ByB;;kBAkClB;eACH,CACT,CAAC,gCAAD,EAAmC,UAAnC,CADS;GAnCqB;;kBAwClB,IAxCkB;;iBA0CnB,IA1CmB;;WA4CzB;CA5CJ;;ACFP;;;AAGA,AAAO,IAAMC,qBAAqB;UACxB,mBADwB;SAEzB;eACM,CACT,UADS;GAHmB;;UAQxB;eACK,CACT,eADS;GATmB;;WAcvB;eACI,CACT,iBADS,EAET,iBAFS,CADJ;;;;gBAQK,EARL;;;;;WAcA;GA5BuB;;kBAiChB;eACH,CACT,CAAC,qCAAD,EAAwC,OAAxC,CADS,EAET,CAAC,gCAAD,EAAmC,SAAnC,CAFS,CADG;;cAMJ;GAvCoB;;kBA0ChB;eACH,CACT,CAAC,uBAAD,EAA0B,OAA1B,CADS;GA3CmB;;OAgD3B;eACQ,CACT,MADS,EAET,QAFS;GAjDmB;;iBAuDjB,IAvDiB;;WAyDvB;CAzDJ;;ACHP;;;AAGA,AAAO,IAAMC,iBAAiB;UACpB,eADoB;SAErB;eACM,CACT,eADS;GAHe;;UASpB;eACK,CACT,iBADS;GAVe;;WAgBnB;eACI,CACT,iBADS,CADJ;;;;gBAQK,EARL;;;;;WAcA,CACL,kBADK,EAEL,sBAFK;GA9BmB;;kBAqCZ;eACH,CACT,CAAC,gCAAD,EAAmC,OAAnC,CADS;GAtCe;;kBA2CZ;eACH,CACT,CAAC,uBAAD,EAA0B,OAA1B,CADS;GA5Ce;;OAiDvB;eACQ;GAlDe;;iBAsDb,IAtDa;;WAwDnB;CAxDJ;;ACHP;;;AAGA,AAAO,IAAMC,eAAe;UAClB,aADkB;SAEnB;eACM,CACT,IADS;GAHa;;UASlB;eACK,CACT,qBADS;GAVa;;WAgBjB;eACI,CACT,cADS,CADJ;;;;gBAQK,EARL;;;;;WAcA,CACL,cADK;GA9BiB;;kBAoCV;eACH,CACT,WADS;GArCa;;kBA0CV;eACH;GA3Ca;;OAgDrB;eACQ;GAjDa;;iBAqDX,IArDW;;WAuDjB;CAvDJ;;ACHP;;;AAGA,AAAO,IAAMC,iBAAiB;UACpB,eADoB;SAErB;eACM,CACT,sBADS;GAHe;;UASpB;eACK,CACT,oBADS;GAVe;;WAgBnB;eACI;;qBAAA,CADJ;;;;gBAQK,EARL;;;;;WAcA,CACL,iBADK;GA9BmB;;kBAoCZ;eACH,CACT,CAAC,qBAAD,EAAwB,UAAxB,CADS;GArCe;;kBA0CZ;eACH,CACT,CAAC,uBAAD,EAA0B,OAA1B,CADS;GA3Ce;;OAgDvB;eACQ;;;GAjDe;;iBAsDb,IAtDa;;WAwDnB;CAxDJ;;ACHP;;;AAGA,AAAO,IAAMC,oBAAoB;UACvB,kBADuB;SAExB;eACM,CACT,qBADS;GAHkB;;UASvB;eACK,CACT,gCADS,EACyB,gBADzB;GAVkB;;WAgBtB;eACI,CACT,CAAC,+BAAD,EAAkC,gBAAlC,CADS,EAET,gBAFS,CADJ;;oBAMS,KANT;;;;gBAUK;UACN,GADM;;0CAG0B,yCAAC9P,KAAD,EAAW;YACzCA,MAAM+P,GAAN,CAAU,KAAV,KAAoB/P,MAAM+P,GAAN,CAAU,+BAAV,CAAxB,EAAoE;iBAC3D,QAAP;;;eAGK,IAAP;OARQ;;2EAYR;KAtBG;;;;;WA4BA,CACL,oBADK,EAEL,uEAFK,EAGL,YAHK,EAIL,QAJK;GA5CsB;;kBAoDf;eACH,CACT,gBADS;GArDkB;;kBA0Df;eACH,CACT,CAAC,uBAAD,EAA0B,OAA1B,CADS;GA3DkB;;OAgE1B;eACQ;GAjEkB;;iBAqEhB,IArEgB;;WAuEtB;CAvEJ;;ACHP;;;AAGA,AAAO,IAAMC,iBAAiB;UACpB,kBADoB;SAErB;eACM,CACT,gBADS;GAHe;;UASpB;eACK,CACT,eADS,EACQ,KADR;GAVe;;WAgBnB;eACI,CACT,eADS,EAET,gBAFS,CADJ;;;;gBASK,EATL;;;;;WAeA;GA/BmB;;kBAoCZ;eACH,CACT,CAAC,qCAAD,EAAwC,OAAxC,CADS;GArCe;;kBA0CZ;eACH,CACT,CAAC,uBAAD,EAA0B,OAA1B,CADS;GA3Ce;;OAgDvB;eACQ;GAjDe;;iBAqDb,IArDa;;WAuDnB;CAvDJ;;ACHP;;;AAGA,AAAO,IAAMC,wBAAwB;UAC3B,sBAD2B;SAE5B;eACM,CACT,eADS;GAHsB;;UAS3B;eACK,CACT,CAAC,qBAAD,EAAwB,OAAxB,CADS;GAVsB;;WAgB1B;eACI;;uBAAA,EAGT,kBAHS,CADJ;;;;gBASK,EATL;;;;;WAeA;GA/B0B;;kBAoCnB;eACH,CACT,CAAC,uBAAD,EAA0B,OAA1B,CADS;GArCsB;;iBA0CpB,IA1CoB;;WA4C1B;CA5CJ;;ACHP;;;AAGA,AAAO,IAAMC,oBAAoB;UACvB,kBADuB;SAExB;eACM;;KAER,uBAAD,EAA0B,OAA1B,CAFS;GAHkB;;UASvB;eACK,CACT,oCADS;GAVkB;;WAetB;eACI;;yBAAA,EAGT,gBAHS,EAGS,aAHT,EAIT,aAJS,CADJ;;;;gBAUK,EAVL;;;;;WAgBA,CACL,YADK;GA/BsB;;kBAoCf;eACH,CACT,CAAC,+CAAD,EAAkD,UAAlD,CADS;GArCkB;;kBA2Cf;eACH;;KAER,uBAAD,EAA0B,OAA1B,CAFS;GA5CkB;;OAmD1B;eACQ;GApDkB;;iBAwDhB,IAxDgB;;WA0DtB;CA1DJ;;ACHA,IAAMC,oBAAoB;UACvB,cADuB;;oBAGb,CAChB,aADgB,EAEhB,gBAFgB,EAGhB,YAHgB,EAIhB,aAJgB,EAKhB,cALgB,EAMhB,WANgB,CAHa;;SAYxB;eACM,CACT,aADS;GAbkB;;UAkBvB;eACK,CACT,SADS;GAnBkB;;WAwBtB;eACI,CACT,eADS,EAET,gBAFS,CADJ;;;;gBAQK;0DAC0C,8CAACnQ,KAAD,EAAW;YACvDoQ,YAAYpQ,MAAME,IAAN,CAAW,IAAX,EAAiBhF,KAAjB,CAAuB,UAAvB,EAAmC,CAAnC,CAAlB;cACMgF,IAAN,CAAW,KAAX,qCAAmDkQ,SAAnD;;KAXG;;;;;WAkBA,CACL,YADK,EAEL,WAFK;GA1CsB;;kBAgDf;eACH,CACT,CAAC,wBAAD,EAA2B,UAA3B,CADS;GAjDkB;;kBAsDf;eACH,CACT,CAAC,uBAAD,EAA0B,OAA1B,CADS;GAvDkB;;OA4D1B;eACQ;;;GA7DkB;;iBAkEhB;eACF;;;GAnEkB;;WAwEtB;eACI;;;;CAzER;;ACAP;;;AAGA,AAAO,IAAMC,yBAAyB;UAC5B,uBAD4B;SAE7B;eACM,CACT,kBADS;GAHuB;;UAQ5B;eACK,CACT,uBADS;GATuB;;WAc3B;eACI,CACT,2BADS,CADJ;;;;gBAOK,EAPL;;;;;WAaA;GA3B2B;;kBAgCpB;eACH,CACT,CAAC,8BAAD,EAAiC,OAAjC,CADS;GAjCuB;;kBAsCpB;eACH,CACT,CAAC,uBAAD,EAA0B,OAA1B,CADS;GAvCuB;;OA4C/B;eACQ;GA7CuB;;iBAiDrB;eACF;;;GAlDuB;;WAuD3B;eACI;;;;CAxDR;;ACHP;;;AAGA,AAAO,IAAMC,4BAA4B;UAC/B,0BAD+B;SAEhC;eACM,CACT,aADS;GAH0B;;UAQ/B;eACK,CACT,mBADS;GAT0B;;WAc9B;eACI,CACT,mBADS,CADJ;;;;gBAOK;wDACwC,+CAACtQ,KAAD,EAAQN,CAAR,EAAc;YACxD6Q,OAAOC,KAAK1U,KAAL,CAAWkE,MAAME,IAAN,CAAW,YAAX,CAAX,CAAb;YACQsP,GAFsD,GAE9Ce,KAAKE,OAAL,CAAa,CAAb,CAF8C,CAEtDjB,GAFsD;;YAGxD7K,OAAOjF,EAAE,SAAF,EAAaQ,IAAb,CAAkB,KAAlB,EAAyBsP,GAAzB,CAAb;cACMhM,WAAN,CAAkBmB,IAAlB;;KAZG;;;;;WAmBA;GAjC8B;;kBAsCvB;eACH,CACT,CAAC,kCAAD,EAAqC,UAArC,CADS;GAvC0B;;kBA4CvB;eACH,CACT,CAAC,uBAAD,EAA0B,OAA1B,CADS;GA7C0B;;OAkDlC;eACQ;GAnD0B;;iBAuDxB;eACF;;;GAxD0B;;WA6D9B;eACI;;;;CA9DR;;ACHA,IAAM+L,kBAAkB;UACrB,YADqB;;oBAGX,CAChB,4BADgB,CAHW;;SAOtB;eACM,CACT,IADS;GARgB;;UAarB;eACK,CACT,CAAC,qBAAD,EAAwB,OAAxB,CADS;GAdgB;;WAmBpB;eACI,CACT,CAAC,kBAAD,CADS,EAET,kBAFS,EAGT,yBAHS,CADJ;;;;gBASK;;cAEF,gBAAC1Q,KAAD,EAAW;YACX2Q,OACJ,kEADF;YAEMC,QAAQC,mBAAmB7Q,MAAME,IAAN,CAAW,gBAAX,CAAnB,CAAd;;YAEIyQ,KAAKxW,IAAL,CAAUyW,KAAV,CAAJ,EAAsB;6BACGA,MAAM9V,KAAN,CAAY6V,IAAZ,CADH;;cACb7Q,CADa;cACVsQ,SADU;;;gBAEdlQ,IAAN,CAAW,KAAX,qCAAmDkQ,SAAnD;cACM/H,UAAUrI,MAAMiE,OAAN,CAAc,QAAd,CAAhB;cACM6M,WAAWzI,QAAQpO,IAAR,CAAa,YAAb,CAAjB;kBACQ8W,KAAR,GAAgB7H,MAAhB,CAAuB,CAAClJ,KAAD,EAAQ8Q,QAAR,CAAvB;;OAZM;;;cAiBF,gBAAC9Q,KAAD,EAAW;;YAEbA,MAAM/F,IAAN,CAAW,QAAX,EAAqBuB,MAArB,GAA8B,CAAlC,EAAqC;;YAE/BmJ,OAAO3E,MAAM/F,IAAN,CAAW,KAAX,EAAkBgD,KAAlB,CAAwB,CAAC,CAAzB,EAA4B,CAA5B,CAAb;YACM6T,WAAW9Q,MAAM/F,IAAN,CAAW,YAAX,CAAjB;cACM8W,KAAN,GAAc7H,MAAd,CAAqB,CAACvE,IAAD,EAAOmM,QAAP,CAArB;;KAhCG;;;;;WAuCA;GA1DoB;;kBA+Db;eACH,CACT,CAAC,gBAAD,EAAmB,UAAnB,CADS;GAhEgB;;kBAqEb;eACH,CACT,CAAC,uBAAD,EAA0B,OAA1B,CADS;GAtEgB;;OA2ExB;eACQ;;;GA5EgB;;iBAiFd;eACF;;;GAlFgB;;WAuFpB;eACI;;;;CAxFR;;ACAA,IAAME,qBAAqB;UACxB,aADwB;;SAGzB;eACM,CACT,wBADS,EAET,IAFS,EAGT,WAHS;GAJmB;;UAWxB,WAXwB;;kBAahB;eACH,CACT,sBADS,CADG;;cAKJ;GAlBoB;;OAqB3B;eACQ;;;GAtBmB;;kBA2BhB;eACH,CACT,CAAC,uBAAD,EAA0B,OAA1B,CADS;GA5BmB;;WAiCvB;eACI,CACT,kBADS,EAET,gBAFS,CADJ;;;;gBAQK,EARL;;;;;WAcA,CACL,gBADK;;CA/CJ;;ACAA,IAAMC,gCAAgC;UACnC,wBADmC;;SAGpC;eACM,CACT,IADS,EAET,0BAFS;GAJ8B;;UAUnC;eACK,CACT,YADS;GAX8B;;kBAgB3B;eACH,CACT,CAAC,yCAAD,EAA4C,SAA5C,CADS;GAjB8B;;OAsBtC;eACQ;GAvB8B;;kBA2B3B;eACH,CACT,CAAC,uBAAD,EAA0B,OAA1B,CADS;GA5B8B;;WAiClC;eACI,CACT,eADS,CADJ;;;;gBAOK;4BACY,0BAACjR,KAAD,EAAW;YAC3BA,MAAM+P,GAAN,CAAU,kBAAV,EAA8BvU,MAA9B,GAAuC,CAA3C,EAA8C;iBACrC,QAAP;;;cAGIkH,MAAN;eACO,IAAP;OAPQ;qBASK;KAhBV;;;;;WAsBA,CACL,oBADK,EAEL,yBAFK;;CAvDJ;;ACAA,IAAMwO,gCAAgC;UACnC,wBADmC;;SAGpC;eACM,CACT,oBADS;GAJ8B;;UASnC;eACK,CACT,iCADS;GAV8B;;kBAe3B;eACH,CACT,CAAC,oCAAD,EAAuC,OAAvC,CADS,EAET,CAAC,qCAAD,EAAwC,OAAxC,CAFS;GAhB8B;;OAsBtC;eACQ,CACT,uBADS;GAvB8B;;kBA4B3B;eACH,CACT,CAAC,uBAAD,EAA0B,OAA1B,CADS;GA7B8B;;WAkClC;eACI,CACT,iBADS,CADJ;;oBAKS,KALT;;;;gBASK;;;;;KATL;;;;;WAmBA,CACL,aADK,EAEL,YAFK,EAGL,cAHK,EAIL,cAJK,EAKL,oBALK,EAML,kBANK;;CArDJ;;ACAA,IAAMC,0BAA0B;UAC7B,iBAD6B;;SAG9B;eACM,CACT,qBADS,EAET,kCAFS;GAJwB;;UAU7B;eACK,CACT,iBADS,EAET,mCAFS;GAXwB;;kBAiBrB;eACH,CACT,CAAC,qCAAD,EAAwC,OAAxC,CADS,CADG;;cAKJ;GAtByB;;OAyBhC;eACQ,CACT,oBADS;GA1BwB;;kBA+BrB;eACH,CACT,CAAC,uBAAD,EAA0B,OAA1B,CADS;GAhCwB;;WAqC5B;eACI,CACT,CAAC,gBAAD,EAAmB,kBAAnB,CADS,EAET,CAAC,eAAD,EAAkB,mCAAlB,CAFS,CADJ;;;;gBAQK,EARL;;;;;WAcA,CACL,OADK;;CAnDJ;;ACAA,IAAMC,uBAAuB;UAC1B,eAD0B;;SAG3B;eACM,CACT,gBADS;GAJqB;;UAS1B;eACK,CACT,WADS;GAVqB;;kBAelB;eACH,CACT,CAAC,mBAAD,EAAsB,OAAtB,CADS,CADG;;cAKJ;GApBsB;;OAuB7B;eACQ,CACT,eADS;GAxBqB;;kBA6BlB;eACH,CACT,CAAC,uBAAD,EAA0B,OAA1B,CADS;GA9BqB;;WAmCzB;eACI,CACT,YADS,CADJ;;;;gBAOK,EAPL;;;;;WAaA,CACL,iBADK;;CAhDJ;;ACAA,IAAMC,0BAA0B;UAC7B,kBAD6B;;oBAGnB,CAAC,iBAAD,CAHmB;;SAK9B;eACM,CACT,IADS;GANwB;;UAW7B;eACK,CACT,CAAC,qBAAD,EAAwB,OAAxB,CADS;GAZwB;;kBAiBrB;eACH,CACT,CAAC,qCAAD,EAAwC,OAAxC,CADS;GAlBwB;;OAuBhC;eACQ,CACT,UADS;GAxBwB;;kBA6BrB;eACH,CACT,CAAC,uBAAD,EAA0B,OAA1B,CADS;GA9BwB;;WAmC5B;eACI;;KAER,wBAAD,EAA2B,gBAA3B,EAA6C,kBAA7C,CAFS;;KAIR,gBAAD,EAAmB,kBAAnB,CAJS;;2BAAA;;yBAAA,CADJ;;;gBAaK;gBACA,kBAACrR,KAAD,EAAW;YACbiP,YAAYjP,MAAM6D,QAAN,EAAlB;YACIoL,UAAUzT,MAAV,KAAqB,CAArB,IAA0ByT,UAAU5Q,GAAV,CAAc,CAAd,EAAiB4E,OAAjB,KAA6B,KAA3D,EAAkE;iBACzD,MAAP;;;eAGK,IAAP;;KApBG;;;;;WA2BA,CACL,QADK,EAEL,qBAFK;;CA9DJ;;ACAA,IAAMqO,qBAAqB;UACxB,aADwB;;SAGzB;eACM,CACT,gBADS,EAET,IAFS;GAJmB;;UAUxB;eACK,CACT,2BADS;GAXmB;;kBAgBhB;eACH,CACT,CAAC,sBAAD,EAAyB,OAAzB,CADS;GAjBmB;;kBAsBhB;eACH,CACT,CAAC,uBAAD,EAA0B,OAA1B,CADS;GAvBmB;;WA4BvB;eACI;;KAER,0BAAD,EAA6B,eAA7B,CAFS;;mBAAA,EAKT,6BALS,CADJ;;;;gBAWK;gEACgD,wDAACtR,KAAD,EAAW;YAC7DuR,QAAQvR,MAAMwE,IAAN,EAAd;YACI+M,KAAJ,EAAW;iBACF,GAAP;;;eAGK,IAAP;OAPQ;;;;6BAYa,2BAACvR,KAAD,EAAW;YAC5BA,MAAM+P,GAAN,CAAU,GAAV,CAAJ,EAAoB;cACd/P,MAAMtG,IAAN,GAAaE,IAAb,OAAwBoG,MAAM/F,IAAN,CAAW,GAAX,EAAgBP,IAAhB,GAAuBE,IAAvB,EAA5B,EAA2D;kBACnD8I,MAAN;;;OAfI;;kCAoBkB;;KA/BvB;;;;;WAsCA;;CAlEJ;;ACAA,IAAM8O,qBAAqB;UACxB,aADwB;;SAGzB;eACM,CACT,qBADS;GAJmB;;UASxB;eACK,CACT,CAAC,qBAAD,EAAwB,OAAxB,CADS;GAVmB;;kBAehB;eACH,CACT,0BADS,CADG;;cAKJ;GApBoB;;OAuB3B;eACQ;;;GAxBmB;;kBA6BhB;eACH,CACT,CAAC,uBAAD,EAA0B,OAA1B,CADS;GA9BmB;;WAmCvB;eACI,CACT,kBADS,CADJ;;;;gBAOK,EAPL;;;;;WAaA;;CAhDJ;;ACAA,IAAMC,yBAAyB;UAC5B,iBAD4B;;SAG7B;eACM,CACT,cADS,EAET,0BAFS;GAJuB;;UAU5B;eACK,CACT,eADS;GAXuB;;kBAgBpB;eACH,CACT,CAAC,gCAAD,EAAmC,OAAnC,CADS,CADG;;cAKJ;GArBwB;;OAwB/B;eACQ;;;GAzBuB;;kBA8BpB;eACH,CACT,CAAC,uBAAD,EAA0B,OAA1B,CADS;GA/BuB;;WAoC3B;oBACS,KADT;;eAGI,CACT,CAAC,aAAD,EAAgB,kBAAhB,CADS,CAHJ;;;;gBASK;qBACK,mBAACzR,KAAD,EAAQN,CAAR,EAAc;YACrBgS,UAAUhS,EAAE,0BAAF,EAA8BQ,IAA9B,CAAmC,OAAnC,CAAhB;cACMsE,IAAN,6DAC+CkN,OAD/C;;KAZG;;;;;WAqBA;;CAzDJ;;ACAA,IAAMC,6BAA6B;UAChC,qBADgC;;SAGjC;eACM,CACT,oBADS;GAJ2B;;UAShC;eACK,CACT,UADS;GAV2B;;kBAexB;eACH,CACT,CAAC,qCAAD,EAAwC,OAAxC,CADS;GAhB2B;;OAqBnC;eACQ,CACT,sBADS;GAtB2B;;kBA2BxB;eACH,CACT,CAAC,uBAAD,EAA0B,OAA1B,CADS;GA5B2B;;WAiC/B;eACI,CACT,wBADS,CADJ;;;;gBAOK,EAPL;;;;;WAaA,CACL,iBADK,EAEL,cAFK;;CA9CJ;;ACAA,IAAMC,0BAA0B;UAC7B,kBAD6B;;SAG9B;eACM,CACT,iBADS;GAJwB;;UAS7B;eACK,CACR,CAAC,qBAAD,EAAwB,OAAxB,CADQ;GAVwB;;kBAerB;eACH,CACR,CAAC,qCAAD,EAAwC,OAAxC,CADQ;GAhBwB;;OAqBhC;eACQ,CACT,0BADS;GAtBwB;;kBA2BrB;eACH,CACT,CAAC,uBAAD,EAA0B,OAA1B,CADS;GA5BwB;;WAiC5B;eACI,CACT,qBADS,CADJ;;;;gBAOK,EAPL;;;;;WAaA;;CA9CJ;;ACAA,IAAMC,2BAA2B;UAC9B,mBAD8B;;SAG/B;eACM;;oBAAA;;;sBAAA;;;4BAAA;GAJyB;;UAgB9B;eACK,CACT,CAAC,6BAAD,EAAgC,OAAhC,CADS,EAET,uBAFS;;;YAAA;;;aAAA;GAjByB;;kBA6BtB;eACH,CACT,CAAC,mBAAD,EAAsB,UAAtB,CADS,EAET,CAAC,gBAAD,EAAmB,UAAnB,CAFS,EAGT,CAAC,mBAAD,EAAsB,OAAtB,CAHS,EAIT,CAAC,+BAAD,EAAkC,OAAlC,CAJS;GA9ByB;;OAsCjC;eACQ;GAvCyB;;kBA2CtB;eACH,CACT,CAAC,uBAAD,EAA0B,OAA1B,CADS;GA5CyB;;WAiD7B;eACI,CACT,wBADS;;;KAIR,oBAAD,CAJS;;;gBAAA,CADJ;;;;gBAaK,EAbL;;;;;WAmBA,CACL,oBADK,EAEL,UAFK;;CApEJ;;ACAA,IAAMC,wBAAwB;UAC3B,gBAD2B;;SAG5B;eACM,CACT,qBADS;GAJsB;;UAS3B;eACK,CACT,0BADS;GAVsB;;kBAenB;eACH,CACT,CAAC,6CAAD,EAAgD,UAAhD,CADS;GAhBsB;;kBAqBnB;eACH,CACT,CAAC,uBAAD,EAA0B,OAA1B,CADS;GAtBsB;;WA2B1B;eACI,CACT,kBADS,CADJ;;;;gBAOK,EAPL;;;;;WAaA;;CAxCJ;;ACAA,IAAMC,qBAAqB;UACxB,aADwB;;SAGzB;eACM,CACT,IADS,EAET,aAFS;GAJmB;;UAUxB;eACK,CACT,oCADS;GAXmB;;kBAgBhB;eACH,CACT,CAAC,2BAAD,EAA8B,UAA9B,CADS,EAET,CAAC,mBAAD,EAAsB,OAAtB,CAFS;GAjBmB;;kBAuBhB;eACH,CACT,CAAC,uBAAD,EAA0B,OAA1B,CADS,EAET,CAAC,gCAAD,EAAmC,OAAnC,CAFS;GAxBmB;;WA8BvB;eACI,CACT,YADS,CADJ;;;;gBAOK;2BACW,QADX;2CAE2B;KAThC;;;;;WAeA,CACL,qBADK;;CA7CJ;;ACAA,IAAMC,wBAAwB;UAC3B,gBAD2B;;SAG5B;eACM,CACT,iBADS;GAJsB;;UAS3B;eACK,CACT,CAAC,qBAAD,EAAwB,OAAxB,CADS;GAVsB;;kBAenB;eACH,CACT,CAAC,qCAAD,EAAwC,OAAxC,CADS;GAhBsB;;OAqB9B;eACQ,CACT,0BADS;GAtBsB;;kBA2BnB;eACH,CACT,CAAC,uBAAD,EAA0B,OAA1B,CADS;GA5BsB;;WAiC1B;eACI,CACT,CAAC,sBAAD,EAAyB,kBAAzB,CADS,EAET,kBAFS,CADJ;;;;gBAQK,EARL;;;;;WAcA;;CA/CJ;;ACAA,IAAMC,iBAAiB;UACpB,QADoB;;SAGrB;eACM,CACT,6CADS;GAJe;;UASpB;eACK,CACT,CAAC,qBAAD,EAAwB,OAAxB,CADS;GAVe;;kBAeZ;eACH,CACT,YADS;GAhBe;;kBAqBZ;eACH,CACT,CAAC,uBAAD,EAA0B,OAA1B,CADS;GAtBe;;WA2BnB;eACI,CACT,CAAC,uBAAD,EAA0B,YAA1B,CADS,EAET,YAFS,CADJ;;;;gBAQK,EARL;;;;;WAcA,CACL,gBADK,EAEL,8BAFK;;CAzCJ;;ACAA,IAAMC,2BAA2B;UAC9B,mBAD8B;;SAG/B;eACM,CACT,iBADS;GAJyB;;UAS9B;eACK,CACT,6CADS;GAVyB;;kBAetB;eACH;;kBAAA,CADG;;cAMJ;GArB0B;;OAwBjC;eACQ,CACT,iBADS;GAzByB;;kBA8BtB;eACH,CACT,CAAC,8BAAD,EAAiC,MAAjC,CADS;GA/ByB;;WAoC7B;eACI,CACT,iBADS,CADJ;;;;gBAOK,EAPL;;;;;WAaA;;CAjDJ;;ACAA,IAAMC,yBAAyB;UAC5B,iBAD4B;;SAG7B;eACM,CACT,qBADS;GAJuB;;UAS5B;eACK,CACT,SADS;GAVuB;;kBAepB;eACH,CACT,CAAC,wCAAD,EAA2C,OAA3C,CADS;GAhBuB;;kBAqBpB;eACH,CACT,CAAC,uBAAD,EAA0B,OAA1B,CADS;GAtBuB;;WA2B3B;eACI,CACT,eADS,CADJ;;;;gBAOK;2BACW;KARhB;;;;;WAcA,CACL,yBADK;;CAzCJ;;ACAA,IAAMC,uBAAuB;UAC1B,cAD0B;;SAG3B;eACM,CACT,UADS;GAJqB;;UAS1B;eACK,CACT,oBADS;GAVqB;;kBAelB;eACH,CACT,CAAC,wCAAD,EAA2C,OAA3C,CADS;GAhBqB;;kBAqBlB;eACH,CACT,CAAC,uBAAD,EAA0B,OAA1B,CADS;GAtBqB;;WA2BzB;eACI,CACT,mCADS,CADJ;;;;gBAOK;uBACO;KARZ;;;;;WAcA;;CAzCJ;;ACAA,IAAMC,gCAAgC;UACnC,wBADmC;;SAGpC;eACM,CACT,gBADS;GAJ8B;;UASnC;eACK,CACT,sBADS;GAV8B;;kBAe3B;eACH,CACT,CAAC,gCAAD,EAAmC,OAAnC,CADS;GAhB8B;;kBAqB3B;eACH,CACT,CAAC,uBAAD,EAA0B,OAA1B,CADS;GAtB8B;;WA2BlC;eACI,CACT,iBADS,CADJ;;;;gBAOK,EAPL;;;;;WAaA;;CAxCJ;;ACAA,IAAMC,qBAAqB;UACxB,aADwB;;SAGzB;eACM,CACT,iBADS;GAJmB;;UASxB;eACK,CACP,CAAC,qBAAD,EAAwB,OAAxB,CADO;GAVmB;;kBAehB;eACH,CACP,CAAC,qCAAD,EAAwC,OAAxC,CADO;GAhBmB;;OAqB3B;eACQ,CACT,QADS;GAtBmB;;kBA2BhB;eACH,CACT,CAAC,uBAAD,EAA0B,OAA1B,CADS;GA5BmB;;WAiCvB;eACI,CACT,CAAC,sBAAD,EAAyB,kBAAzB,CADS,EAET,kBAFS,CADJ;;;;gBAQK;yCACyB,qCAACtS,KAAD,EAAW;YACtCuS,UAAUvS,MAAMwE,IAAN,EAAhB;cACMP,OAAN,CAAc,iBAAd,EAAiChK,IAAjC,CAAsC,kBAAtC,EAA0DuJ,WAA1D,CAAsE+O,OAAtE;OAHQ;;+BAMe;KAdpB;;;;;WAoBA;;CArDJ;;ACAA,IAAMC,qCAAqC;UACxC,6BADwC;;SAGzC;eACM,CACT,IADS,EAET,eAFS;GAJmC;;UAUxC;eACK,CACT,wCADS;GAXmC;;kBAgBhC;eACH,CACR,CAAC,qCAAD,EAAwC,OAAxC,CADQ,CADG;YAIN,6BAJM;cAKJ;GArBoC;;OAwB3C;eACQ,CACT,gBADS;GAzBmC;;kBA8BhC;eACH,CACT,CAAC,uBAAD,EAA0B,OAA1B,CADS;GA/BmC;;WAoCvC;eACI,CACT,CAAC,iBAAD,EAAoB,iBAApB,CADS,EAET,UAFS,CADJ;;;;gBAQK;yBACS,uBAACxS,KAAD,EAAQN,CAAR,EAAc;YACzB+S,UAAUzS,MAAM/F,IAAN,CAAW,wBAAX,EACXA,IADW,CACN,cADM,EAEXwP,KAFW,GAGX8G,IAHW,CAGN,cAHM,CAAhB;YAIIkC,OAAJ,EAAa;gBACLtD,OAAN,CAAczP,wCAAsC+S,OAAtC,SAAd;;;KAfC;;;;;WAuBA,CACL,+BADK;;CA3DJ;;ACAA,IAAMC,oCAAoC;UACvC,4BADuC;;SAGxC;eACM,CACT,IADS,EAET,eAFS;GAJkC;;UAUvC;eACK,CACT,wCADS;GAXkC;;kBAgB/B;eACH,CACT,CAAC,qCAAD,EAAwC,OAAxC,CADS;GAjBkC;;OAsB1C;eACQ,CACT,gBADS;GAvBkC;;kBA4B/B;eACH,CACT,CAAC,uBAAD,EAA0B,OAA1B,CADS;GA7BkC;;WAkCtC;eACI,CACT,CAAC,iBAAD,EAAoB,iBAApB,CADS,EAET,UAFS,CADJ;;;;gBAQK;yBACS,uBAAC1S,KAAD,EAAQN,CAAR,EAAc;YACzBiT,eAAe3S,MAAM6D,QAAN,GAAiB4F,KAAjB,EAArB;YACIkJ,aAAa9I,QAAb,CAAsB,YAAtB,CAAJ,EAAyC;cACjC+I,qBAAqBD,aAAa1Y,IAAb,CAAkB,2BAAlB,EAA+C4J,QAA/C,GAA0D4F,KAA1D,EAA3B;cACMoJ,WAAWD,mBAAmBrC,IAAnB,CAAwB,sBAAxB,CAAjB;cACMuC,WAAWF,mBAAmBrC,IAAnB,CAAwB,sBAAxB,CAAjB;cACIuC,YAAYD,QAAhB,EAA0B;kBAClB1D,OAAN,CAAczP,+DACEmT,QADF,uCAEEC,QAFF,+BAAd;;SALJ,MAUO;cACCL,UAAUzS,MAAM/F,IAAN,CAAW,wBAAX,EACbA,IADa,CACR,cADQ,EAEbwP,KAFa,GAGb8G,IAHa,CAGR,cAHQ,CAAhB;cAIIkC,OAAJ,EAAa;kBACLtD,OAAN,CAAczP,wCAAsC+S,OAAtC,SAAd;;;;KA3BD;;;;;WAoCA,CACL,+BADK;;CAtEJ;;ACAA,IAAMM,yBAAyB;UAC5B,iBAD4B;;SAG7B;eACM,CACT,YADS;GAJuB;;UAS5B;eACK,CACT,CAAC,qBAAD,EAAwB,OAAxB,CADS;GAVuB;;kBAepB;eACH,CACT,CAAC,gCAAD,EAAmC,OAAnC,CADS;GAhBuB;;kBAqBpB;eACH,CACT,CAAC,uBAAD,EAA0B,OAA1B,CADS;GAtBuB;;WA2B3B;eACI,CACT,cADS,CADJ;;;;gBAOK;oBACI,mBAAC/S,KAAD,EAAW;YACjBgT,UAAUhT,MAAM/F,IAAN,CAAW,QAAX,CAAhB;cACMuJ,WAAN,CAAkBwP,OAAlB;;KAVG;;;;;WAiBA,CACL,YADK,EAEL,YAFK;;CA5CJ;;ACAA,IAAMC,sBAAsB;UACzB,aADyB;;oBAGf,CAChB,YADgB,CAHe;;SAO1B;eACM,CACT,MADS;GARoB;;UAazB;eACK,CACT,SADS;GAdoB;;kBAmBjB;eACH,CACT,CAAC,qCAAD,EAAwC,OAAxC,CADS;GApBoB;;OAyB5B;eACQ,CACT,CAAC,0BAAD,EAA6B,OAA7B,CADS;GA1BoB;;kBA+BjB;eACH,CACT,CAAC,uBAAD,EAA0B,OAA1B,CADS;GAhCoB;;WAqCxB;eACI,CACT,CAAC,yBAAD,EAA4B,gBAA5B,CADS,EAET,gBAFS,CADJ;;;;gBAQK;iCACiB,QADjB;0BAEU;KAVf;;;;;WAgBA,CACL,gBADK;;CArDJ;;ACAA,IAAMC,kCAAkC;UACrC,yBADqC;;SAGtC;eACM,CACT,gBADS;GAJgC;;UASrC;eACK,CACT,6BADS;GAVgC;;kBAe7B;eACH,CACT,CAAC,qCAAD,EAAwC,OAAxC,CADS;GAhBgC;;kBAqB7B;eACH,CACT,CAAC,uBAAD,EAA0B,OAA1B,CADS;GAtBgC;;WA2BpC;eACI,CACT,gBADS,CADJ;;;;gBAOK,EAPL;;;;;WAaA,CACL,CAAC,UAAD,CADK;;CAxCJ;;ACAA,IAAMC,2BAA2B;UAC9B,mBAD8B;;SAG/B;eACM,CACT,mBADS;GAJyB;;UAS9B;eACK,CACT,cADS;GAVyB;;kBAetB;eACH,CACT,CAAC,kCAAD,EAAqC,UAArC,CADS,CADG;cAIJ;GAnB0B;;OAsBjC;eACQ,CACT,kBADS;GAvByB;;kBA4BtB;eACH,CACT,CAAC,uBAAD,EAA0B,OAA1B,CADS;GA7ByB;;WAkC7B;eACI,CACT,UADS,CADJ;;;;gBAOK,EAPL;;;;;WAaA;;CA/CJ;;ACAA,IAAMC,uBAAuB;UAC1B,eAD0B;;SAG3B;eACM,CACT,IADS,EAET,kBAFS;GAJqB;;UAU1B;eACK,CACT,SADS;GAXqB;;kBAgBlB;eACH,CACT,CAAC,6BAAD,EAAgC,OAAhC,CADS;GAjBqB;;OAsB7B;eACQ,CACT,CAAC,0BAAD,EAA6B,OAA7B,CADS;GAvBqB;;kBA4BlB;eACH,CACT,CAAC,uBAAD,EAA0B,OAA1B,CADS;GA7BqB;;WAkCzB;eACI,CACT,iBADS,CADJ;;;;gBAOK;yBACS,sBAACpT,KAAD,EAAQN,CAAR,EAAc;mDACN0T,qBAAqBC,cAArB,CAAoChH,SAApC,CAA8C,CAA9C,CADM;YACxBzL,QADwB;YACdV,IADc;;YAEzBsP,MAAM9P,EAAEkB,QAAF,EAAYV,IAAZ,CAAiBA,IAAjB,CAAZ;YACIsP,GAAJ,EAAS;gBACDL,OAAN,gBAA2BK,GAA3B;;;KAZC;;;;;WAoBA;;CAtDJ;;ACAA,IAAM8D,qCAAqC;UACxC,6BADwC;;SAGzC;eACM,CACR,CAAC,6BAAD,EAAgC,OAAhC,CADQ;GAJmC;;UASxC;eACK,CACT,CAAC,8BAAD,EAAiC,OAAjC,CADS;GAVmC;;kBAehC;eACH,CACT,CAAC,4BAAD,EAA+B,OAA/B,CADS,CADG;cAIJ;GAnBoC;;OAsB3C;eACQ;;;GAvBmC;;kBA4BhC;eACH,CACT,CAAC,uBAAD,EAA0B,OAA1B,CADS;GA7BmC;;WAkCvC;eACI,CACT,kBADS,CADJ;;;;gBAOK,EAPL;;;;;WAaA;;CA/CJ;;ACAA,IAAMC,6BAA6B;UAChC,qBADgC;;SAGjC;eACM,CACT,UADS,EAET,cAFS,EAGT,QAHS;GAJ2B;;UAWhC;eACK,CACT,oCADS;GAZ2B;;kBAiBxB;eACH,CACT,sBADS,CADG;cAIJ;GArB4B;;kBAwBxB;eACH,CACT,CAAC,uBAAD,EAA0B,OAA1B,CADS;GAzB2B;;WA8B/B;eACI,CACT,2BADS,CADJ;;;;gBAOK,EAPL;;;;;WAaA;;CA3CJ;;ACAA,IAAMC,wBAAwB;UAC3B,gBAD2B;;SAG5B;eACM,CACT,oBADS;GAJsB;;UAS3B;eACK,CACT,UADS,CADL;WAIC,CACL,iBADK,EAEL,UAFK;GAb0B;;kBAmBnB;eACH,CACT,YADS,CADG;cAIJ;;GAvBuB;;kBA2BnB;eACH,CACT,CAAC,uBAAD,EAA0B,OAA1B,CADS;GA5BsB;;WAiC1B;eACI,CACT,eADS,CADJ;;;;gBAOK,EAPL;;;;;WAaA;;CA9CJ;;ACAA,IAAMC,6BAA6B;UAChC,qBADgC;;SAGjC;eACM,CACT,gBADS;GAJ2B;;UAShC;eACK,CACT,CAAC,6BAAD,EAAgC,OAAhC,CADS;GAV2B;;kBAexB;eACH,CACT,CAAC,4BAAD,EAA+B,OAA/B,CADS;GAhB2B;;kBAqBxB;eACH,CACT,CAAC,uBAAD,EAA0B,OAA1B,CADS;GAtB2B;;WA2B/B;eACI,CACT,iBADS,CADJ;;;;gBAOK,EAPL;;;;;WAaA,CACL,YADK,EAEL,aAFK,EAGL,aAHK,EAIL,oBAJK;;CAxCJ;;ACAA,IAAMC,sBAAsB;UACzB,cADyB;;SAG1B;eACM,CACT,UADS;GAJoB;;UASzB;eACK,CACT,CAAC,qBAAD,EAAwB,OAAxB,CADS;GAVoB;;kBAejB;eACH,CACT,CAAC,qCAAD,EAAwC,OAAxC,CADS;GAhBoB;;kBAqBjB;eACH,CACT,CAAC,uBAAD,EAA0B,OAA1B,CADS;GAtBoB;;WA2BxB;eACI,CACT,0BADS,EAET,WAFS,CADJ;;;;gBAQK,EARL;;;;;WAcA;;CAzCJ;;ACAA,IAAMC,0BAA0B;UAC7B,kBAD6B;;SAG9B;eACM,CACT,eADS,EAET,YAFS;GAJwB;;UAU7B;eACK,CACT,CAAC,6BAAD,EAAgC,OAAhC,CADS;GAXwB;;kBAgBrB;eACH,CACT,CAAC,qCAAD,EAAwC,OAAxC,CADS;GAjBwB;;kBAsBrB;eACH,CACP,CAAC,uBAAD,EAA0B,OAA1B,CADO;GAvBwB;;WA4B5B;eACI,CACT,UADS,CADJ;;;;gBAOK,EAPL;;;;;WAaA,CACL,mBADK,EAEL,YAFK,EAGL,YAHK;;CAzCJ;;ACAA,IAAMC,uBAAuB;UAC1B,cAD0B;;SAG3B;eACM,CACT,gBADS;GAJqB;;UAS1B;eACK,CACT,SADS,EAET,QAFS;GAVqB;;kBAgBlB;eACH,CACT,CAAC,qCAAD,EAAwC,OAAxC,CADS;GAjBqB;;OAsB7B;eACQ,CACT,QADS;GAvBqB;;kBA4BlB;eACH,CACT,CAAC,uBAAD,EAA0B,OAA1B,CADS;GA7BqB;;WAkCzB;eACI,CACT,mBADS,CADJ;;;;gBAOK,EAPL;;;;;WAaA;;CA/CJ;;ACAA,IAAMC,qBAAqB;UACxB,YADwB;;SAGzB;eACM,CACT,CAAC,uBAAD,EAA0B,OAA1B,CADS;GAJmB;;UASxB;eACK,CACT,iBADS;GAVmB;;kBAehB;eACH,CACT,CAAC,qCAAD,EAAwC,OAAxC,CADS;GAhBmB;;kBAqBhB;eACH,CACR,CAAC,uBAAD,EAA0B,OAA1B,CADQ;GAtBmB;;WA2BvB;eACI,CACT,yBADS,CADJ;;;;gBAOK,EAPL;;;;;WAaA;;CAxCJ;;ACAA,IAAMC,4BAA4B;UAC/B,oBAD+B;;SAGhC;eACM,CACT,WADS;GAJ0B;;UAS/B;eACK,CACT,kCADS;GAV0B;;kBAevB;cACJ,kBADI;;eAGH,CACT,6BADS;GAlB0B;;kBAuBvB;eACH,CACT,CAAC,uBAAD,EAA0B,OAA1B,CADS;GAxB0B;;WA6B9B;eACI,CACT,wBADS,CADJ;;;;gBAOK,EAPL;;;;;WAaA,CACL,iBADK;;CA1CJ;;ACAA,IAAMC,8BAA8B;UACjC,sBADiC;;SAGlC;eACM,CACT,kBADS;GAJ4B;;UASjC;eACK,CACT,kCADS;GAV4B;;kBAezB;eACH,CACT,6BADS,CADG;;cAKJ;GApB6B;;OAuBpC;eACQ,CACT,sBADS;GAxB4B;;kBA6BzB;eACH,CACT,CAAC,uBAAD,EAA0B,OAA1B,CADS;GA9B4B;;WAmChC;eACI,CACT,CAAC,iBAAD,EAAoB,kBAApB,CADS,EAET,kBAFS,CADJ;;;;gBAQK,EARL;;;;;WAcA,CACL,iBADK;;CAjDJ;;ACAA,IAAMC,kCAAkC;UACrC,eADqC;;SAGtC;eACM,CACT,OADS,EAET,mBAFS;GAJgC;;UAUrC;eACK,CACT,SADS;GAXgC;;kBAgB7B;eACH,CACT,CAAC,sBAAD,EAAyB,gBAAzB,CADS;GAjBgC;;kBAsB7B;eACH,CACT,CAAC,uBAAD,EAA0B,OAA1B,CADS;GAvBgC;;WA4BpC;eACI,CACT,sBADS,CADJ;;;;gBAOK,EAPL;;;;;WAaA;;CAzCJ;;ACAA,IAAMC,qBAAqB;UACxB,YADwB;;SAGzB;eACM,CACT,iBADS;GAJmB;;UASxB;eACK,CACT,uBADS;GAVmB;;kBAehB;eACH,CACR,CAAC,qCAAD,EAAwC,OAAxC,CADQ;GAhBmB;;kBAqBhB;eACH,CACT,CAAC,uBAAD,EAA0B,OAA1B,CADS;GAtBmB;;WA2BvB;eACI,CACT,YADS,CADJ;;;;gBAOK;mBACG,QADH;oCAEoB;KATzB;;;;;WAeA;;CA1CJ;;ACAA,IAAMC,yBAAyB;UAC5B,iBAD4B;;SAG7B;eACM,CACT,mBADS;GAJuB;;UAS5B;eACK,CACT,uBADS;GAVuB;;kBAepB;eACH,CACT,CAAC,gCAAD,EAAmC,OAAnC,CADS;GAhBuB;;kBAqBpB;eACH,CACT,CAAC,uBAAD,EAA0B,OAA1B,CADS;GAtBuB;;WA2B3B;eACI,CACT,CAAC,8DAAD,CADS,CADJ;;;;gBAOK;iCACiB,QADjB;iDAEiC;KATtC;;;;;WAeA;;CA1CJ;;ACAA,IAAMC,6BAA6B;UAChC,qBADgC;;SAGjC;eACM,CACT,UADS;GAJ2B;;kBASxB;eACH,CACT,kBADS,CADG;;cAKJ;GAd4B;;kBAiBxB;eACH,CACT,CAAC,uBAAD,EAA0B,OAA1B,CADS;GAlB2B;;WAuB/B;eACI,CACT,wBADS,CADJ;;;;gBAOK,EAPL;;;;;WAaA;;CApCJ;;ACAA,IAAMC,4BAA4B;UAC/B,oBAD+B;;SAGhC;eACM,CACT,UADS;GAJ0B;;UAS/B;eACK,CACT,cADS;GAV0B;;kBAevB;eACH,CACT,CAAC,4BAAD,EAA+B,OAA/B,CADS,CADG;;cAKJ;GApB2B;;kBAuBvB;eACH,CACT,CAAC,uBAAD,EAA0B,OAA1B,CADS;GAxB0B;;WA6B9B;eACI,CACT,CAAC,oBAAD,EAAuB,kBAAvB,CADS,EAET,kBAFS,EAGT,OAHS,CADJ;;;;gBASK;8BACc,4BAACpU,KAAD,EAAW;YAC3BuS,UAAUvS,MAAMwE,IAAN,EAAhB;cACMP,OAAN,CAAc,UAAd,EAA0BT,WAA1B,CAAsC+O,OAAtC;OAHQ;;wBAMQ,QANR;;yCAQyB,YARzB;;uBAUO;KAnBZ;;;;;WAyBA,CACL,cADK;;CAtDJ;;ACAA,IAAM8B,2BAA2B;UAC9B,mBAD8B;;SAG/B;eACM,CACT,IADS,EAET,UAFS;GAJyB;;UAU9B;eACK,CACT,aADS;GAXyB;;kBAgBtB;eACH,CACT,kBADS,CADG;;;YAMN,6BANM;;cAQJ;GAxB0B;;OA2BjC;eACQ,CACT,CAAC,0BAAD,EAA6B,OAA7B,CADS;GA5ByB;;kBAiCtB;eACH,CACT,CAAC,uBAAD,EAA0B,OAA1B,CADS;GAlCyB;;WAuC7B;eACI,CACT,UADS,CADJ;;;;gBAOK,EAPL;;;;;WAaA;;CApDJ;;ACAA,IAAMC,gCAAgC;UACnC,wBADmC;;SAGpC;eACM,CACT,IADS,EAET,eAFS;GAJ8B;;UAUnC;eACK,CACT,UADS;GAX8B;;kBAgB3B;eACH,CACT,CAAC,qCAAD,EAAwC,OAAxC,CADS;GAjB8B;;OAsBtC;eACQ,CACT,CAAC,6BAAD,EAAgC,OAAhC,CADS;GAvB8B;;kBA4B3B;eACH,CACT,CAAC,cAAD,EAAiB,KAAjB,CADS;GA7B8B;;WAkClC;eACI,CACT,eADS,CADJ;;;;gBAOK,EAPL;;;;;WAaA,CACL,QADK,EAEL,YAFK;;CA/CJ;;ACAA,IAAMC,oBAAoB;UACvB,YADuB;;SAGxB;eACM,CACT,IADS,EAET,aAFS;GAJkB;;UAUvB;eACK,CACT,CAAC,qBAAD,EAAwB,OAAxB,CADS;GAXkB;;kBAgBf;eACH,CACT,YADS,CADG;;cAKJ;GArBmB;;OAwB1B;eACQ,CACT,eADS;GAzBkB;;kBA8Bf;eACH,CACT,CAAC,uBAAD,EAA0B,OAA1B,CADS;GA/BkB;;WAoCtB;eACI,CACT,CAAC,GAAD,EAAM,mBAAN,EAA2B,kBAA3B,CADS,CADJ;;;;gBAOK;;gBAEA,kBAACvU,KAAD,EAAW;YACbiP,YAAYjP,MAAM6D,QAAN,EAAlB;YACIoL,UAAUzT,MAAV,KAAqB,CAArB,IAA0ByT,UAAU5Q,GAAV,CAAc,CAAd,EAAiB4E,OAAjB,KAA6B,KAA3D,EAAkE;iBACzD,QAAP;;;eAGK,IAAP;;KAfG;;;;;WAsBA,CACL,CACE,eADF,EAEE,kBAFF,EAGE,cAHF,EAIE,eAJF,CADK;;CA1DJ;;ACAA,IAAMuR,0BAA0B;UAC7B,kBAD6B;;SAG9B;eACM,CACT,aADS;GAJwB;;UAS7B;eACK,CACT,8BADS;GAVwB;;kBAerB;eACH,CACT,6BADS,CADG;;cAKJ;GApByB;;kBAuBrB;eACH,CACT,CAAC,uBAAD,EAA0B,OAA1B,CADS;GAxBwB;;WA6B5B;eACI,CACT,eADS,CADJ;;;;gBAOK,EAPL;;;;;WAaA;;CA1CJ;;ACAA,IAAMC,sBAAsB;UACzB,cADyB;;SAG1B;eACM,CACT,CAAC,uBAAD,EAA0B,OAA1B,CADS;GAJoB;;UASzB;eACK,CACT,UADS;GAVoB;;kBAejB;eACH,CACT,MADS,CADG;;cAKJ;GApBqB;;OAuB5B;eACQ,CACT,cADS;GAxBoB;;kBA6BjB;eACH,CACT,CAAC,uBAAD,EAA0B,OAA1B,CADS;GA9BoB;;WAmCxB;eACI,CACT,CAAC,oBAAD,EAAuB,oBAAvB,CADS,EAET,oBAFS,CADJ;;;;gBAQK;sBACM,qBAACzU,KAAD,EAAW;YACnB2E,OAAO3E,MAAM/F,IAAN,CAAW,KAAX,CAAb;aACKiG,IAAL,CAAU,OAAV,EAAmB,MAAnB;aACKA,IAAL,CAAU,QAAV,EAAoB,MAApB;aACKmF,QAAL,CAAc,gBAAd;cACM3C,MAAN,CAAa,eAAb,EAA8ByM,OAA9B,CAAsCxK,IAAtC;;KAdG;;;;;WAqBA;;CAxDJ;;ACAA,IAAM+P,6BAA6B;UAChC,qBADgC;;SAGjC;eACM,CACT,cADS;GAJ2B;;UAShC;eACK,CACT,SADS;GAV2B;;kBAexB;eACH,CACT,CAAC,qCAAD,EAAwC,OAAxC,CADS,CADG;;cAKJ;GApB4B;;kBAuBxB;eACH,CACT,CAAC,uBAAD,EAA0B,OAA1B,CADS;GAxB2B;;WA6B/B;eACI,CACT,uBADS,CADJ;;;;gBAOK,EAPL;;;;;WAaA;;CA1CJ;;ACAA,IAAMC,uBAAuB;UAC1B,eAD0B;;SAG3B;eACM,CACT,mBADS;GAJqB;;UAS1B;eACK,CACT,CAAC,qBAAD,EAAwB,OAAxB,CADS;GAVqB;;kBAelB;eACH,CACT,CAAC,6BAAD,EAAgC,OAAhC,CADS;GAhBqB;;kBAqBlB;eACH,CACT,CAAC,uBAAD,EAA0B,OAA1B,CADS;GAtBqB;;WA2BzB;eACI,CACT,kBADS,CADJ;;;;gBAOK,EAPL;;;;;WAaA,CACL,gBADK;;CAxCJ;;ACAA,IAAMC,2BAA2B;UAC9B,mBAD8B;;SAG/B;eACM,CACT,OADS;GAJyB;;UAS9B;eACK,CACT,kBADS;GAVyB;;kBAetB;eACH,CACT,yBADS,CADG;cAIJ;GAnB0B;;kBAsBtB;eACH,CACT,CAAC,uBAAD,EAA0B,OAA1B,CADS;GAvByB;;WA4B7B;eACI,CACT,aADS,CADJ;;;;gBAOK,EAPL;;;;;WAaA;;CAzCJ;;ACAA,IAAMC,oBAAoB;UACvB,YADuB;;SAGxB;eACM,CACT,CAAC,oBAAD,EAAuB,OAAvB,CADS;GAJkB;;UASvB;eACK,CACT,CAAC,6BAAD,EAAgC,OAAhC,CADS;GAVkB;;kBAef;eACH,CACT,CAAC,oCAAD,EAAuC,OAAvC,CADS,CADG;cAIJ;GAnBmB;;kBAsBf;eACH,CACT,CAAC,uBAAD,EAA0B,OAA1B,CADS;GAvBkB;;WA4BtB;eACI,CACT,gBADS,CADJ;;;;gBAOK,EAPL;;;;;WAaA;;CAzCJ;;ACAA,IAAMC,iCAAiC;UACpC,yBADoC;;SAGrC;eACM,CACT,CAAC,4BAAD,EAA+B,OAA/B,CADS;GAJ+B;;UASpC;eACK,CACT,CAAC,oBAAD,EAAuB,OAAvB,CADS;GAV+B;;kBAe5B;eACH,CACT,CAAC,qCAAD,EAAwC,OAAxC,CADS;GAhB+B;;kBAqB5B;eACH,CACT,CAAC,uBAAD,EAA0B,OAA1B,CADS;GAtB+B;;WA2BnC;eACI,CACT,CAAC,WAAD,EAAc,YAAd,CADS,EAET,YAFS,CADJ;;;;gBAQK,EARL;;;;;WAcA;;CAzCJ;;ACAA,IAAMC,mCAAmC;UACtC,2BADsC;;SAGvC;eACM,CACT,OADS,EAET,gBAFS;GAJiC;;UAUtC;eACK,CACT,CAAC,qBAAD,EAAwB,OAAxB,CADS;GAXiC;;kBAgB9B;eACH,CACT,CAAC,6BAAD,EAAgC,OAAhC,CADS;GAjBiC;;OAsBzC;eACQ,CACT,WADS;GAvBiC;;kBA4B9B;eACH,CACT,CAAC,uBAAD,EAA0B,OAA1B,CADS;GA7BiC;;WAkCrC;eACI,CACT,+BADS,CADJ;;;;gBAOK,EAPL;;;;;WAaA,CACL,kBADK;;CA/CJ;;ACAA,IAAMC,qBAAqB;UACxB,YADwB;;SAGzB;eACM,CACT,aADS,EAET,eAFS,EAGT,WAHS;GAJmB;;UAWxB;eACK,CACT,0BADS;GAZmB;;kBAiBhB;eACH,CACP,CAAC,iBAAD,EAAoB,UAApB,CADO;GAlBmB;;OAuB3B;eACQ;;;GAxBmB;;kBA6BhB;eACH,CACT,CAAC,uBAAD,EAA0B,OAA1B,CADS;GA9BmB;;WAmCvB;eACI,CACT,CAAC,sBAAD,EAAyB,kBAAzB,CADS,EAET,kBAFS,CADJ;;;;gBAQK;gCACgB;KATrB;;;;;WAeA;;CAlDJ;;ACAA,IAAMC,4BAA4B;UAC/B,oBAD+B;;SAGhC;eACM,CACT,QADS,EAET,CAAC,oBAAD,EAAuB,OAAvB,CAFS;GAJ0B;;UAU/B;eACK,CACT,SADS;GAX0B;;kBAgBvB;eACH,CACT,CAAC,2BAAD,EAA8B,OAA9B,CADS;GAjB0B;;OAsBlC;eACQ;;;GAvB0B;;kBA4BvB;eACH,CACT,CAAC,uBAAD,EAA0B,OAA1B,CADS;GA7B0B;;WAkC9B;eACI,CACT,CAAC,kBAAD,EAAqB,QAArB,EAA+B,OAA/B,CADS,EAET,OAFS,CADJ;;;;gBAQK,EARL;;;;;WAcA,CACL,wBADK,EAEL,sBAFK;;CAhDJ;;ACAA,IAAMC,yBAAyB;UAC5B,gBAD4B;;SAG7B;eACM,CACT,cADS,EAET,iBAFS,EAGT,kBAHS;GAJuB;;UAW5B;eACK,CACT,eADS,EAET,qBAFS;GAZuB;;kBAkBpB;eACH,CACT,CAAC,qCAAD,EAAwC,OAAxC,CADS;GAnBuB;;OAwB/B;eACQ;;;GAzBuB;;kBA8BpB;eACH,CACT,CAAC,gCAAD,EAAmC,KAAnC,CADS;GA/BuB;;WAoC3B;eACI,CACT,4BADS,CADJ;;;;gBAOK,EAPL;;;;;WAaA;;CAjDJ;;ACAA,IAAMC,2BAA2B;UAC9B,kBAD8B;;SAG/B;eACM,CACT,QADS;GAJyB;;UAS9B;eACK,CACT,cADS;GAVyB;;kBAetB;eACH,CACT,CAAC,qCAAD,EAAwC,OAAxC,CADS;GAhByB;;kBAqBtB;eACH,CACT,CAAC,uBAAD,EAA0B,OAA1B,CADS;GAtByB;;WA2B7B;eACI,CACT,gBADS,CADJ;;;;gBAOK,EAPL;;;;;WAaA;;CAxCJ;;ACAA,IAAMC,6BAA6B;UAChC,oBADgC;;SAGjC;eACM,CACT,UADS,EAET,CAAC,uBAAD,EAA0B,OAA1B,CAFS;GAJ2B;;UAUhC;eACK,CACT,2GADS,EAET,gBAFS;GAX2B;;kBAiBxB;eACH,CACT,CAAC,qCAAD,EAAwC,OAAxC,CADS;GAlB2B;;kBAuBxB;eACH,CACT,CAAC,uBAAD,EAA0B,OAA1B,CADS;GAxB2B;;WA6B/B;eACI,CACT,aADS,CADJ;;;;gBAOK,EAPL;;;;;WAaA,CACL,UADK;;CA1CJ;;ACAA,IAAMC,oBAAoB;UACvB,YADuB;;SAGxB;eACM,CACT,CAAC,oBAAD,EAAuB,OAAvB,CADS;GAJkB;;UASvB;eACK,CACT,CAAC,6BAAD,EAAgC,OAAhC,CADS;GAVkB;;kBAef;eACH,CACT,CAAC,oCAAD,EAAuC,OAAvC,CADS,CADG;;cAKJ;GApBmB;;kBAuBf;eACH,CACT,CAAC,uBAAD,EAA0B,OAA1B,CADS;GAxBkB;;WA6BtB;eACI,CACT,gBADS,CADJ;;;;gBAOK,EAPL;;;;;WAaA;;CA1CJ;;ACAA,IAAMC,2BAA2B;UAC9B,mBAD8B;;SAG/B;eACM,CACT,qCADS;GAJyB;;UAS9B;eACK,CACT,2BADS;GAVyB;;kBAetB;eACH,CACT,CAAC,4BAAD,EAA+B,OAA/B,CADS;GAhByB;;kBAqBtB;eACH,CACT,CAAC,uBAAD,EAA0B,OAA1B,CADS;GAtByB;;WA2B7B;eACI,CACT,eADS,EAET,iBAFS,CADJ;;;;gBAQK,EARL;;;;;WAcA,CACL,gBADK,EAEL,yBAFK,EAGL,yBAHK;;CAzCJ;;ACAA,IAAMC,yBAAyB;UAC5B,iBAD4B;;SAG7B;eACM,CACT,oBADS;GAJuB;;UAS5B;eACK,CACT,oBADS;GAVuB;;kBAepB;eACH,CACT,CAAC,wDAAD,EAA2D,UAA3D,CADS,EAET,4BAFS,CADG;;cAMJ;GArBwB;;kBAwBpB;eACH,CACT,CAAC,uBAAD,EAA0B,OAA1B,CADS;GAzBuB;;WA8B3B;eACI,CACT,kBADS,CADJ;;;;gBAOK,EAPL;;;;;WAaA;;CA3CJ;;ACAA,IAAMC,sBAAsB;UACzB,aADyB;;SAG1B;eACM,CACT,IADS;GAJoB;;UASzB;eACK,CACT,CAAC,qBAAD,EAAwB,OAAxB,CADS;GAVoB;;kBAejB;eACH,CACT,WADS,CADG;;cAKJ;GApBqB;;kBAuBjB;eACH,CACT,CAAC,uBAAD,EAA0B,OAA1B,CADS;GAxBoB;;WA6BxB;eACI,CACT,CAAC,SAAD,EAAY,aAAZ,CADS,EAET,aAFS,CADJ;;;;gBAQK,EARL;;;;;WAcA;;CA3CJ;;ACAA,IAAMC,0BAA0B;UAC7B,kBAD6B;;SAG9B;eACM,CACT,gBADS,EAET,IAFS;GAJwB;;UAU7B;eACK,CACT,CAAC,6BAAD,EAAgC,OAAhC,CADS,EAET,4BAFS;GAXwB;;kBAiBrB;eACH,CACT,CAAC,gCAAD,EAAmC,UAAnC,CADS,CADG;;cAKJ;GAtByB;;OAyBhC;eACQ;;;GA1BwB;;kBA+BrB;eACH,CACT,CAAC,uBAAD,EAA0B,OAA1B,CADS;GAhCwB;;WAqC5B;eACI,CACT,CAAC,eAAD,EAAkB,QAAlB,CADS,EAET,QAFS,CADJ;;;;gBAQK,EARL;;;;;WAcA,CACL,eADK;;CAnDJ;;ACAA,IAAMC,sCAAsC;UACzC,8BADyC;;oBAG/B,CAChB,gBADgB,CAH+B;;SAO1C;eACM,CACT,IADS,EAET,kBAFS;GARoC;;UAczC;eACK,CACT,mBADS,EAET,wBAFS;GAfoC;;kBAqBjC;eACH,CACT,CAAC,qCAAD,EAAwC,OAAxC,CADS;GAtBoC;;OA2B5C;eACQ,CACT,kCADS;GA5BoC;;kBAiCjC;eACH,CACT,CAAC,uBAAD,EAA0B,OAA1B,CADS;GAlCoC;;WAuCxC;oBACS,KADT;;eAGI,CACT,mBADS,EAET,8BAFS,CAHJ;;;;gBAUK,EAVL;;;;;WAgBA,CACL,kBADK,EAEL,qBAFK;;CAvDJ;;ACAA,IAAMC,+BAA+B;UAClC,uBADkC;;SAGnC;eACM,CACT,UADS;GAJ6B;;UASlC;eACK,CACT,sBADS;GAV6B;;kBAe1B;eACH,CACT,CAAC,2BAAD,EAA8B,OAA9B,CADS;GAhB6B;;OAqBrC;eACQ;;;GAtB6B;;kBA2B1B;eACH,CACT,CAAC,uBAAD,EAA0B,OAA1B,CADS;GA5B6B;;WAiCjC;eACI,CACT,kBADS,CADJ;;;;gBAOK,EAPL;;;;;WAaA,CACL,sBADK;;CA9CJ;;ACAA,IAAMC,mCAAmC;UACtC,4BADsC;;SAGvC;eACM,CACT,aADS;GAJiC;;UAStC;eACK,CACT,sBADS;GAViC;;kBAe9B;eACH,CACT,YADS,CADG;;cAKJ;GApBkC;;OAuBzC;eACQ,CACT,gBADS;GAxBiC;;kBA6B9B;eACH,CACT,CAAC,uBAAD,EAA0B,OAA1B,CADS;GA9BiC;;WAmCrC;eACI;;2BAAA,CADJ;;;;gBAQK,EARL;;;;;WAcA;;CAjDJ;;ACAA,IAAMC,sBAAsB;UACzB,aADyB;;SAG1B;eACM,CACT,CAAC,uBAAD,EAA0B,OAA1B,CADS;GAJoB;;UASzB;eACK,CACT,cADS,EAET,CAAC,8BAAD,EAAiC,OAAjC,CAFS;GAVoB;;kBAgBjB;eACH,CACT,CAAC,mBAAD,EAAsB,OAAtB,CADS;GAjBoB;;OAsB5B;eACQ;;;GAvBoB;;kBA4BjB;eACH,CACT,CAAC,uBAAD,EAA0B,OAA1B,CADS;GA7BoB;;WAkCxB;eACI,CACT,CAAC,uBAAD,EAA0B,cAA1B,CADS,CADJ;;;;gBAOK,EAPL;;;;;WAaA,CACL,aADK,EAEL,UAFK,EAGL,WAHK;;CA/CJ;;ACAA,IAAMC,wBAAwB;UAC3B,gBAD2B;;SAG5B;eACM,CACT,IADS,EAET,UAFS;GAJsB;;UAU3B;eACK,CACT,OADS;GAXsB;;kBAgBnB;eACH,CACT,CAAC,kBAAD,EAAqB,iBAArB,CADS;GAjBsB;;OAsB9B;eACQ,CACT,UADS;GAvBsB;;kBA4BnB;eACH,CACT,CAAC,wBAAD,EAA2B,KAA3B,CADS;GA7BsB;;WAkC1B;eACI,CACT,SADS,CADJ;;;;gBAOK,EAPL;;;;;WAaA,CACL,WADK,EAEL,UAFK,EAGL,WAHK;;CA/CJ;;ACAA,IAAMC,wBAAwB;UAC3B,eAD2B;;oBAGjB,CAChB,gBADgB,EAEhB,WAFgB,EAGhB,WAHgB,EAIhB,iBAJgB,EAKhB,WALgB,CAHiB;;SAW5B;eACM,CACT,IADS,EAET,kBAFS;GAZsB;;UAkB3B;eACK,CACT,SADS;GAnBsB;;kBAwBnB;eACH,CACT,MADS,EAET,gBAFS,CADG;;cAMJ;GA9BuB;;OAiC9B;eACQ,CACT,IADS;GAlCsB;;kBAuCnB;eACH,CACT,CAAC,uBAAD,EAA0B,OAA1B,CADS;GAxCsB;;WA6C1B;eACI,CACT,aADS,CADJ;;;;gBAOK;wBACQ,QADR;uBAEO,YAFP;wBAGQ,QAHR;uBAIO,YAJP;yBAKS,QALT;wBAMQ;KAbb;;;;;WAmBA,CACL,gBADK,EAEL,gBAFK,EAGL,iBAHK,EAIL,cAJK;;CAhEJ;;ACAA,IAAMC,sBAAsB;UACzB,cADyB;;SAG1B;eACM,CACT,IADS;GAJoB;;UASzB;eACK,CACT,6BADS;GAVoB;;kBAejB;eACH,CACT,CAAC,mBAAD,EAAsB,OAAtB,CADS;GAhBoB;;OAqB5B;eACQ,CACT,WADS;GAtBoB;;kBA2BjB;eACH,CACT,CAAC,uBAAD,EAA0B,OAA1B,CADS;GA5BoB;;WAiCxB;eACI,CACT,kBADS,CADJ;;;;gBAOK;sBACM,oBAAChW,KAAD,EAAW;YACnBwP,MAAMxP,MAAME,IAAN,CAAW,KAAX,CAAZ;cACM0F,MAAN,GAAepC,WAAf,wBAAgDgM,GAAhD;OAHQ;kBAKE;KAZP;;;;;WAkBA,CACL,QADK;;CAnDJ;;ACAA,IAAMyG,uBAAuB;UAC1B,eAD0B;;SAG3B;eACM,CACT,MADS,EAET,IAFS;GAJqB;;UAU1B;eACK,CACT,eADS;GAXqB;;kBAgBlB;eACH,CACT,WADS,CADG;;cAKJ;GArBsB;;OAwB7B;eACQ,CACT,MADS;GAzBqB;;kBA8BlB;eACH,CACT,CAAC,uBAAD,EAA0B,OAA1B,CADS;GA/BqB;;WAoCzB;eACI,CACT,OADS,CADJ;;;;gBAOK,EAPL;;;;;WAaA,CACL,mBADK,EAEL,YAFK,EAGL,8BAHK,EAIL,cAJK;;CAjDJ;;ACAA,IAAMC,4BAA4B;UAC/B,qBAD+B;;SAGhC;eACM,CACT,IADS;GAJ0B;;UAS/B;eACK,CACT,CAAC,yBAAD,EAA4B,OAA5B,CADS;GAV0B;;kBAevB;eACH,CACT,CAAC,8BAAD,EAAiC,OAAjC,CADS,CADG;;cAKJ;GApB2B;;OAuBlC;eACQ,CACT,wBADS;GAxB0B;;kBA6BvB;eACH,CACT,CAAC,uBAAD,EAA0B,OAA1B,CADS;GA9B0B;;WAmC9B;eACI,CACT,CAAC,uBAAD,EAA0B,qBAA1B,CADS,CADJ;;;;gBAOK,EAPL;;;;;WAaA;;CAhDJ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;ACGP,iBAAe,aAAYC,gBAAZ,EAA8B/Z,MAA9B,CAAqC,UAACC,GAAD,EAAMkI,GAAN,EAAc;MAC1DmK,YAAYyH,iBAAiB5R,GAAjB,CAAlB;sBAEKlI,GADL,EAEKwS,sBAAsBH,SAAtB,CAFL;CAFa,EAMZ,EANY,CAAf;;ACHA;AACA,AAAO,IAAM0H,kBAAkB,wCAAxB;;;;AAIP,AAAO,IAAMC,eAAe,IAAI/b,MAAJ,CAAW,aAAX,EAA0B,GAA1B,CAArB;;;;;;;;;;AAUP,AAAO;;;;;;;AAQP,AAAO;;;AAKP,AAAO,IAAMgc,iBAAiB,WAAvB;AACP,AAAO,IAAMC,kBAAkB,WAAxB;AACP,AAAO,IAAMC,uBAAuB,4BAA7B;AACP,AAAO,IAAMC,yBAAyB,oBAA/B;AACP,AAAO,IAAMC,wBAAwB,QAA9B;AACP,IAAMC,SAAS,CACb,KADa,EAEb,KAFa,EAGb,KAHa,EAIb,KAJa,EAKb,KALa,EAMb,KANa,EAOb,KAPa,EAQb,KARa,EASb,KATa,EAUb,KAVa,EAWb,KAXa,EAYb,KAZa,CAAf;AAcA,IAAMC,YAAYD,OAAOha,IAAP,CAAY,GAAZ,CAAlB;AACA,IAAMka,aAAa,qCAAnB;AACA,IAAMC,aAAa,wCAAnB;AACA,IAAMC,aAAa,cAAnB;AACA,AAAO,IAAMC,oBACX,IAAI1c,MAAJ,OAAeuc,UAAf,WAA+BC,UAA/B,WAA+CC,UAA/C,wBAA4EH,SAA5E,QAA0F,IAA1F,CADK;;;;AAKP,AAAO,IAAMK,sBAAsB,WAA5B;;;;;AAKP,AAAO,IAAMC,qBAAqB,gBAA3B;;AAEP,AAAO,IAAMC,oBACX,IAAI7c,MAAJ,CAAW,2BAAX,EAAwC,GAAxC,CADK;;AC5DP;;AAEA,AAAe,SAAS8c,WAAT,CAAqBC,MAArB,EAA6B;SACnC5d,gBACL4d,OAAO1d,OAAP,CAAeyc,eAAf,EAAgC,IAAhC,EAAsCxc,IAAtC,EADK,CAAP;;;ACJa,SAASgU,OAAT,CAAe0J,YAAf,EAA6B;iBAC3BA,aAAa1d,IAAb,EAAf;MACI2d,SAASC,QAAT,CAAkBF,YAAlB,CAAJ,EAAqC;WAC5BA,YAAP;;;SAGK,IAAP;;;ACAF;;AAEA,AAAe,SAASG,QAAT,CAAkBC,GAAlB,QAAuC;MAAdhY,CAAc,QAAdA,CAAc;MAAXiY,OAAW,QAAXA,OAAW;;;MAEhDD,IAAIlc,MAAJ,GAAa,IAAb,IAAqBkc,IAAIlc,MAAJ,GAAa,CAAtC,EAAyC,OAAO,IAAP;;;MAGrCmc,WAAW7a,eAAe6a,OAAf,EAAwB,EAAxB,MAAgC7a,eAAe4a,GAAf,EAAoB,EAApB,CAA/C,EAAwE,OAAO,IAAP;;MAElEE,UAAU5L,UAAU0L,GAAV,EAAehY,CAAf,CAAhB;;;;MAII2W,aAAalc,IAAb,CAAkByd,OAAlB,CAAJ,EAAgC,OAAO,IAAP;;SAEzBne,gBAAgBme,QAAQhe,IAAR,EAAhB,CAAP;;;ACrBF;;;;AAIA,AAUA,AAAO,SAASie,eAAT,CAAyBC,UAAzB,EAAqC;SACnC,CAACA,WAAWhd,KAAX,CAAiBkc,iBAAjB,KAAuC,EAAxC,EACWra,IADX,CACgB,GADhB,EAEWhD,OAFX,CAEmB+c,qBAFnB,EAE0C,GAF1C,EAGW/c,OAHX,CAGmB8c,sBAHnB,EAG2C,UAH3C,EAIW9c,OAJX,CAImB6c,oBAJnB,EAIyC,IAJzC,EAKW5c,IALX,EAAP;;;AAQF,AAAO,SAASme,UAAT,CAAoBD,UAApB,EAAgCE,QAAhC,EAA0CC,MAA1C,EAAkD;MACnDhB,oBAAoB9c,IAApB,CAAyB2d,UAAzB,CAAJ,EAA0C;WACjCI,OAAO,IAAIC,IAAJ,CAASL,UAAT,CAAP,CAAP;;;SAGKE,WACLE,OAAOE,EAAP,CAAUN,UAAV,EAAsBG,UAAUI,YAAYP,UAAZ,CAAhC,EAAyDE,QAAzD,CADK,GAELE,OAAOJ,UAAP,EAAmBG,UAAUI,YAAYP,UAAZ,CAA7B,CAFF;;;;;AAOF,AAAe,SAASQ,kBAAT,CAA4BR,UAA5B,EAAmE;iFAAJ,EAAI;MAAzBE,QAAyB,QAAzBA,QAAyB;MAAfC,MAAe,QAAfA,MAAe;;;MAE5E3B,eAAenc,IAAf,CAAoB2d,UAApB,KAAmCvB,gBAAgBpc,IAAhB,CAAqB2d,UAArB,CAAvC,EAAyE;WAChE,IAAIK,IAAJ,CAASnd,SAAS8c,UAAT,EAAqB,EAArB,CAAT,EAAmCS,WAAnC,EAAP;;;MAGEC,OAAOT,WAAWD,UAAX,EAAuBE,QAAvB,EAAiCC,MAAjC,CAAX;;MAEI,CAACO,KAAKC,OAAL,EAAL,EAAqB;iBACNZ,gBAAgBC,UAAhB,CAAb;WACOC,WAAWD,UAAX,EAAuBE,QAAvB,EAAiCC,MAAjC,CAAP;;;SAGKO,KAAKC,OAAL,KAAiBD,KAAKD,WAAL,EAAjB,GAAsC,IAA7C;;;ACrCF;AACA,AAAe,SAASG,gBAAT,CACbvT,OADa,QASb;MANEzF,CAMF,QANEA,CAMF;mCALEiZ,kBAKF;MALEA,kBAKF,yCALuB,IAKvB;wBAJEpO,KAIF;MAJEA,KAIF,8BAJU,EAIV;sBAHEzQ,GAGF;MAHEA,GAGF,4BAHQ,EAGR;iCAFE8e,cAEF;MAFEA,cAEF,uCAFmB,IAEnB;;;;qBAGgBzT,OAAhB,EAAyBzF,CAAzB;;;;;MAKIkZ,cAAJ,EAAoB7T,YAAYI,OAAZ,EAAqBzF,CAArB;;;uBAGFyF,OAAlB,EAA2BzF,CAA3B,EAA8B5F,GAA9B;;;;;aAKWqL,OAAX,EAAoBzF,CAApB,EAAuB5F,GAAvB;;;;gBAIcqL,OAAd,EAAuBzF,CAAvB;;;;;gBAKWyF,OAAX,EAAoBzF,CAApB;;;eAGayF,OAAb,EAAsBzF,CAAtB,EAAyB6K,KAAzB;;;;;;MAMIqO,cAAJ,EAAoBvO,aAAUlF,OAAV,EAAmBzF,CAAnB,EAAsBiZ,kBAAtB;;;cAGRxT,OAAZ,EAAqBzF,CAArB;;;qBAGgByF,OAAhB,EAAyBzF,CAAzB;;SAEOyF,OAAP;;;AC3Da,SAAS0T,aAAT,CAAoBtO,KAApB,QAAuC;MAAVzQ,GAAU,QAAVA,GAAU;MAAL4F,CAAK,QAALA,CAAK;;;;MAGhDwX,mBAAmB/c,IAAnB,CAAwBoQ,KAAxB,CAAJ,EAAoC;YAC1BuO,kBAAkBvO,KAAlB,EAAyBzQ,GAAzB,CAAR;;;;;MAKEyQ,MAAM/O,MAAN,GAAe,GAAnB,EAAwB;;QAEhBud,KAAKrZ,EAAE,IAAF,CAAX;QACIqZ,GAAGvd,MAAH,KAAc,CAAlB,EAAqB;cACXud,GAAGrf,IAAH,EAAR;;;;;SAKGD,gBAAgBuS,UAAUzB,KAAV,EAAiB7K,CAAjB,EAAoB9F,IAApB,EAAhB,CAAP;;;AChBF,SAASof,sBAAT,CAAgCC,UAAhC,EAA4Cvf,IAA5C,EAAkD;;;;MAI5Cuf,WAAWzd,MAAX,IAAqB,CAAzB,EAA4B;;;;;UAIpB0d,aAAaD,WAAW7c,MAAX,CAAkB,UAACC,GAAD,EAAM8c,SAAN,EAAoB;YACnDA,SAAJ,IAAiB9c,IAAI8c,SAAJ,IAAiB9c,IAAI8c,SAAJ,IAAiB,CAAlC,GAAsC,CAAvD;eACO9c,GAAP;OAFiB,EAGhB,EAHgB,CAAnB;;kCAME,iBAAgB6c,UAAhB,EACQ9c,MADR,CACe,UAACC,GAAD,EAAMkI,GAAN,EAAc;YAChBlI,IAAI,CAAJ,IAAS6c,WAAW3U,GAAX,CAAb,EAA8B;iBACrB,CAACA,GAAD,EAAM2U,WAAW3U,GAAX,CAAN,CAAP;;;eAGKlI,GAAP;OANT,EAOU,CAAC,CAAD,EAAI,CAAJ,CAPV,CAVwB;;UASnB+c,OATmB;UASVC,SATU;;;;;;;;UAuBtBA,aAAa,CAAb,IAAkBD,QAAQ5d,MAAR,IAAkB,CAAxC,EAA2C;qBAC5B9B,KAAKwB,KAAL,CAAWke,OAAX,CAAb;;;UAGIE,YAAY,CAACL,WAAW,CAAX,CAAD,EAAgBA,WAAWhc,KAAX,CAAiB,CAAC,CAAlB,CAAhB,CAAlB;UACMsc,aAAaD,UAAUld,MAAV,CAAiB,UAACC,GAAD,EAAMqB,GAAN;eAAcrB,IAAIb,MAAJ,GAAakC,IAAIlC,MAAjB,GAA0Ba,GAA1B,GAAgCqB,GAA9C;OAAjB,EAAoE,EAApE,CAAnB;;UAEI6b,WAAW/d,MAAX,GAAoB,EAAxB,EAA4B;;aACnB+d;;;;;WAGF7f;;;;;;;SAGF,IAAP;;;AAGF,SAAS8f,oBAAT,CAA8BP,UAA9B,EAA0Cnf,GAA1C,EAA+C;;;;;;;mBAO5B+B,IAAIC,KAAJ,CAAUhC,GAAV,CAP4B;MAOrCkC,IAPqC,cAOrCA,IAPqC;;MAQvCyd,cAAczd,KAAKrC,OAAL,CAAawd,iBAAb,EAAgC,EAAhC,CAApB;;MAEMuC,YAAYT,WAAW,CAAX,EAAcxd,WAAd,GAA4B9B,OAA5B,CAAoC,GAApC,EAAyC,EAAzC,CAAlB;MACMggB,iBAAiBC,MAAMC,WAAN,CAAkBH,SAAlB,EAA6BD,WAA7B,CAAvB;;MAEIE,iBAAiB,GAAjB,IAAwBD,UAAUle,MAAV,GAAmB,CAA/C,EAAkD;WACzCyd,WAAWhc,KAAX,CAAiB,CAAjB,EAAoBN,IAApB,CAAyB,EAAzB,CAAP;;;MAGImd,UAAUb,WAAWhc,KAAX,CAAiB,CAAC,CAAlB,EAAqB,CAArB,EAAwBxB,WAAxB,GAAsC9B,OAAtC,CAA8C,GAA9C,EAAmD,EAAnD,CAAhB;MACMogB,eAAeH,MAAMC,WAAN,CAAkBC,OAAlB,EAA2BL,WAA3B,CAArB;;MAEIM,eAAe,GAAf,IAAsBD,QAAQte,MAAR,IAAkB,CAA5C,EAA+C;WACtCyd,WAAWhc,KAAX,CAAiB,CAAjB,EAAoB,CAAC,CAArB,EAAwBN,IAAxB,CAA6B,EAA7B,CAAP;;;SAGK,IAAP;;;;;AAKF,AAAe,SAASmc,iBAAT,CAA2BvO,KAA3B,EAA4C;MAAVzQ,GAAU,uEAAJ,EAAI;;;;MAGnDmf,aAAa1O,MAAMrP,KAAN,CAAYgc,kBAAZ,CAAnB;MACI+B,WAAWzd,MAAX,KAAsB,CAA1B,EAA6B;WACpB+O,KAAP;;;MAGEyP,WAAWhB,uBAAuBC,UAAvB,EAAmC1O,KAAnC,CAAf;MACIyP,QAAJ,EAAc,OAAOA,QAAP;;aAEHR,qBAAqBP,UAArB,EAAiCnf,GAAjC,CAAX;MACIkgB,QAAJ,EAAc,OAAOA,QAAP;;;;SAIPzP,KAAP;;;AC1FF,IAAM0P,WAAW;UACP7C,WADO;kBAEC8C,OAFD;OAGVzC,QAHU;kBAICa,kBAJD;WAKN6B,gBALM;SAMRtB;CANT,CASA,AAEA,AACA,AACA,AACA,AACA,AACA,AACA;;ACdA;;;;;;;;;;;AAWA,AAAe,SAASuB,eAAT,CAAyB1a,CAAzB,EAA4B2a,IAA5B,EAAkC;;;;;;MAM3CA,KAAKhY,uBAAT,EAAkC;QAC5BA,wBAAwB3C,CAAxB,CAAJ;;;MAGEwE,uBAAoBxE,CAApB,CAAJ;MACI6I,gBAAa7I,CAAb,EAAgB2a,KAAKpS,WAArB,CAAJ;MACMqS,gBAAgB5Q,oBAAiBhK,CAAjB,CAAtB;;SAEO4a,aAAP;;;AC3BF,IAAMC,0BAA0B;eACjB;6BACc,IADd;iBAEE,IAFF;wBAGS;GAJQ;;;;;;;;;;;;;;;;;;;;;SAAA,yBA0BGF,IA1BH,EA0BS;QAA7B3a,CAA6B,QAA7BA,CAA6B;QAA1B8E,IAA0B,QAA1BA,IAA0B;QAApB+F,KAAoB,QAApBA,KAAoB;QAAbzQ,GAAa,QAAbA,GAAa;;wBACzB,KAAK0gB,WAAjB,EAAiCH,IAAjC;;QAEI3a,KAAK3B,QAAQuQ,IAAR,CAAa9J,IAAb,CAAT;;;;QAIIzE,OAAO,KAAK0a,cAAL,CAAoB/a,CAApB,EAAuB6K,KAAvB,EAA8BzQ,GAA9B,EAAmCugB,IAAnC,CAAX;;QAEIzN,iBAAiB7M,IAAjB,CAAJ,EAA4B;aACnB,KAAK2a,kBAAL,CAAwB3a,IAAxB,EAA8BL,CAA9B,CAAP;;;;;;;;;;wCAKgB,iBAAgB2a,IAAhB,EAAsB7O,MAAtB,CAA6B;eAAK6O,KAAKM,CAAL,MAAY,IAAjB;OAA7B,CAAlB,4GAAuE;YAA5DpW,GAA4D;;aAChEA,GAAL,IAAY,KAAZ;YACIxG,QAAQuQ,IAAR,CAAa9J,IAAb,CAAJ;;eAEO,KAAKiW,cAAL,CAAoB/a,CAApB,EAAuB6K,KAAvB,EAA8BzQ,GAA9B,EAAmCugB,IAAnC,CAAP;;YAEIzN,iBAAiB7M,IAAjB,CAAJ,EAA4B;;;;;;;;;;;;;;;;;;;WAKvB,KAAK2a,kBAAL,CAAwB3a,IAAxB,EAA8BL,CAA9B,CAAP;GApD4B;;;;gBAAA,0BAwDfA,CAxDe,EAwDZ6K,KAxDY,EAwDLzQ,GAxDK,EAwDAugB,IAxDA,EAwDM;WAC3BF,iBACGC,gBAAgB1a,CAAhB,EAAmB2a,IAAnB,CADH,EAEL;UAAA;0BAEsBA,KAAK1B,kBAF3B;kBAAA;;KAFK,CAAP;GAzD4B;;;;;;oBAAA,8BAsEX5Y,IAtEW,EAsELL,CAtEK,EAsEF;QACtB,CAACK,IAAL,EAAW;aACF,IAAP;;;WAGKtG,gBAAgBiG,EAAE8E,IAAF,CAAOzE,IAAP,CAAhB,CAAP;;;;;;;CA3EJ,CAqFA;;AC7FA;;;;;;;AAOA,AAAO,IAAM6a,yBAAyB,CACpC,iBADoC,EAEpC,UAFoC,EAGpC,SAHoC,EAIpC,UAJoC,EAKpC,OALoC,CAA/B;;;;AAUP,AAAO,IAAMC,uBAAuB,CAClC,UADkC,CAA7B;;;;;;;;;AAWP,AAAO,IAAMC,yBAAyB,CACpC,sBADoC,EAEpC,kBAFoC,EAGpC,kBAHoC,EAIpC,YAJoC,EAKpC,mBALoC,EAMpC,cANoC,CAA/B;;AASP,AAAO,IAAMC,uBAAuB,CAClC,YADkC,EAElC,cAFkC,EAGlC,cAHkC,EAIlC,aAJkC,EAKlC,aALkC,EAMlC,aANkC,EAOlC,aAPkC,EAQlC,eARkC,EASlC,eATkC,EAUlC,iBAVkC,EAWlC,UAXkC,EAYlC,YAZkC,EAalC,IAbkC,EAclC,iBAdkC,EAelC,OAfkC,CAA7B;;ACxBP,IAAMC,wBAAwB;SAAA,yBACG;QAArBtb,CAAqB,QAArBA,CAAqB;QAAlB5F,GAAkB,QAAlBA,GAAkB;QAAbmhB,SAAa,QAAbA,SAAa;;;;QAGzB1Q,cAAJ;;YAEQa,mBAAgB1L,CAAhB,EAAmBkb,sBAAnB,EAA2CK,SAA3C,CAAR;QACI1Q,KAAJ,EAAW,OAAOsO,cAAWtO,KAAX,EAAkB,EAAEzQ,QAAF,EAAO4F,IAAP,EAAlB,CAAP;;;;YAIH0M,wBAAqB1M,CAArB,EAAwBob,sBAAxB,CAAR;QACIvQ,KAAJ,EAAW,OAAOsO,cAAWtO,KAAX,EAAkB,EAAEzQ,QAAF,EAAO4F,IAAP,EAAlB,CAAP;;;YAGH0L,mBAAgB1L,CAAhB,EAAmBmb,oBAAnB,EAAyCI,SAAzC,CAAR;QACI1Q,KAAJ,EAAW,OAAOsO,cAAWtO,KAAX,EAAkB,EAAEzQ,QAAF,EAAO4F,IAAP,EAAlB,CAAP;;;YAGH0M,wBAAqB1M,CAArB,EAAwBqb,oBAAxB,CAAR;QACIxQ,KAAJ,EAAW,OAAOsO,cAAWtO,KAAX,EAAkB,EAAEzQ,QAAF,EAAO4F,IAAP,EAAlB,CAAP;;;WAGJ,EAAP;;CAvBJ,CA2BA;;ACxCA;;;;;;AAMA,AAAO,IAAMwb,mBAAmB,CAC9B,KAD8B,EAE9B,OAF8B,EAG9B,WAH8B,EAI9B,eAJ8B,EAK9B,YAL8B,EAM9B,WAN8B,EAO9B,SAP8B,CAAzB;;AAUP,AAAO,IAAMC,oBAAoB,GAA1B;;;;;;;;;AASP,AAAO,IAAMC,mBAAmB,CAC9B,sBAD8B,EAE9B,mBAF8B,EAG9B,oBAH8B,EAI9B,mBAJ8B,EAK9B,oBAL8B,EAM9B,qBAN8B,EAO9B,aAP8B,EAQ9B,iBAR8B,EAS9B,oBAT8B,EAU9B,qBAV8B,EAW9B,eAX8B,EAY9B,YAZ8B,EAa9B,YAb8B,EAc9B,cAd8B,EAe9B,cAf8B,EAgB9B,yBAhB8B,EAiB9B,qBAjB8B,EAkB9B,qBAlB8B,EAmB9B,SAnB8B,EAoB9B,SApB8B,EAqB9B,gBArB8B,EAsB9B,gBAtB8B,EAuB9B,SAvB8B,CAAzB;;;;AA4BP,IAAMC,WAAW,aAAjB;AACA,AAAO,IAAMC,sBAAsB,CACjC,CAAC,SAAD,EAAYD,QAAZ,CADiC,EAEjC,CAAC,SAAD,EAAYA,QAAZ,CAFiC,CAA5B;;ACzCP,IAAME,yBAAyB;SAAA,yBACH;QAAhB7b,CAAgB,QAAhBA,CAAgB;QAAbub,SAAa,QAAbA,SAAa;;QACpB5D,eAAJ;;;;aAISjM,mBAAgB1L,CAAhB,EAAmBwb,gBAAnB,EAAqCD,SAArC,CAAT;QACI5D,UAAUA,OAAO7b,MAAP,GAAgB2f,iBAA9B,EAAiD;aACxC/D,YAAYC,MAAZ,CAAP;;;;aAIOjL,wBAAqB1M,CAArB,EAAwB0b,gBAAxB,EAA0C,CAA1C,CAAT;QACI/D,UAAUA,OAAO7b,MAAP,GAAgB2f,iBAA9B,EAAiD;aACxC/D,YAAYC,MAAZ,CAAP;;;;;;;;;;wCAK8BiE,mBAAhC,4GAAqD;;;;;YAAzC1a,QAAyC;YAA/B4a,KAA+B;;YAC7Czb,OAAOL,EAAEkB,QAAF,CAAb;YACIb,KAAKvE,MAAL,KAAgB,CAApB,EAAuB;cACf9B,OAAOqG,KAAKrG,IAAL,EAAb;cACI8hB,MAAMrhB,IAAN,CAAWT,IAAX,CAAJ,EAAsB;mBACb0d,YAAY1d,IAAZ,CAAP;;;;;;;;;;;;;;;;;;;WAKC,IAAP;;CA7BJ,CAiCA;;AC9CA;;;;AAIA,AAAO,IAAM+hB,2BAA2B,CACtC,wBADsC,EAEtC,aAFsC,EAGtC,SAHsC,EAItC,gBAJsC,EAKtC,WALsC,EAMtC,cANsC,EAOtC,UAPsC,EAQtC,UARsC,EAStC,SATsC,EAUtC,eAVsC,EAWtC,UAXsC,EAYtC,cAZsC,EAatC,qBAbsC,EActC,cAdsC,EAetC,SAfsC,EAgBtC,MAhBsC,CAAjC;;;;;AAsBP,AAAO,IAAMC,2BAA2B,CACtC,4BADsC,EAEtC,oBAFsC,EAGtC,0BAHsC,EAItC,kBAJsC,EAKtC,oBALsC,EAMtC,kBANsC,EAOtC,iBAPsC,EAQtC,aARsC,EAStC,eATsC,EAUtC,qBAVsC,EAWtC,mBAXsC,EAYtC,cAZsC,EAatC,aAbsC,EActC,YAdsC,EAetC,kBAfsC,EAgBtC,WAhBsC,EAiBtC,UAjBsC,CAAjC;;;;;AAuBP,IAAMC,kBAAkB,mDAAxB;AACA,AAAO,IAAMC,yBAAyB;;AAEpC,IAAIthB,MAAJ,CAAW,4BAAX,EAAyC,GAAzC,CAFoC;;;;AAMpC,IAAIA,MAAJ,CAAW,6BAAX,EAA0C,GAA1C,CANoC;;AAQpC,IAAIA,MAAJ,iBAAyBqhB,eAAzB,kBAAuD,GAAvD,CARoC,CAA/B;;ACrCP,IAAME,gCAAgC;SAAA,yBACL;QAArBnc,CAAqB,QAArBA,CAAqB;QAAlB5F,GAAkB,QAAlBA,GAAkB;QAAbmhB,SAAa,QAAbA,SAAa;;QACzBa,sBAAJ;;;;oBAIgB1Q,mBAAgB1L,CAAhB,EAAmB+b,wBAAnB,EAA6CR,SAA7C,EAAwD,KAAxD,CAAhB;QACIa,aAAJ,EAAmB,OAAOxD,mBAAmBwD,aAAnB,CAAP;;;;oBAIH1P,wBAAqB1M,CAArB,EAAwBgc,wBAAxB,CAAhB;QACII,aAAJ,EAAmB,OAAOxD,mBAAmBwD,aAAnB,CAAP;;;oBAGHjiB,eAAeC,GAAf,EAAoB8hB,sBAApB,CAAhB;QACIE,aAAJ,EAAmB,OAAOxD,mBAAmBwD,aAAnB,CAAP;;WAEZ,IAAP;;CAlBJ,CAsBA;;ACnCA;;;;;;;;;;;;;;;;;AAiBA,IAAMC,sBAAsB;;SAAA,qBAEhB;WACD,IAAP;;CAHJ;;AAOA;;;;;;;;;;;;;;;;;;;;;;;;;;;;ACxBA;;;AAGA,AAAO,IAAMC,2BAA2B,CACtC,UADsC,EAEtC,eAFsC,EAGtC,WAHsC,CAAjC;;AAMP,AAAO,IAAMC,2BAA2B,CACtC,qBADsC,CAAjC;;AAIP,AAAO,IAAMC,gCAAgC,CAC3C,QAD2C,EAE3C,YAF2C,EAG3C,OAH2C,EAI3C,OAJ2C,EAK3C,UAL2C,CAAtC;AAOP,AAAO,IAAMC,mCAAmC,IAAI7hB,MAAJ,CAAW4hB,8BAA8Bvf,IAA9B,CAAmC,GAAnC,CAAX,EAAoD,GAApD,CAAzC;;AAEP,AAAO,IAAMyf,gCAAgC,CAC3C,QAD2C,EAE3C,QAF2C,EAG3C,OAH2C,EAI3C,UAJ2C,EAK3C,UAL2C,EAM3C,MAN2C,EAO3C,IAP2C,EAQ3C,YAR2C,EAS3C,MAT2C,EAU3C,QAV2C,EAW3C,QAX2C,EAY3C,KAZ2C,EAa3C,QAb2C,EAc3C,SAd2C,EAe3C,QAf2C,EAgB3C,SAhB2C,EAiB3C,SAjB2C,EAkB3C,QAlB2C,EAmB3C,OAnB2C,EAoB3C,UApB2C,EAqB3C,SArB2C,EAsB3C,OAtB2C,EAuB3C,OAvB2C,EAwB3C,KAxB2C,EAyB3C,aAzB2C,CAAtC;AA2BP,AAAO,IAAMC,mCAAmC,IAAI/hB,MAAJ,CAAW8hB,8BAA8Bzf,IAA9B,CAAmC,GAAnC,CAAX,EAAoD,GAApD,CAAzC;;AAEP,AAAO,IAAM2f,SAAS,gBAAf;AACP,AAAO,IAAMC,SAAS,kBAAf;;AC3CP,SAASC,MAAT,CAAgBxc,KAAhB,EAAuB;UACXA,MAAME,IAAN,CAAW,OAAX,KAAuB,EAAjC,WAAuCF,MAAME,IAAN,CAAW,IAAX,KAAoB,EAA3D;;;;AAIF,AAAO,SAASuc,aAAT,CAAuB3iB,GAAvB,EAA4B;QAC3BA,IAAIF,IAAJ,EAAN;MACIkN,QAAQ,CAAZ;;MAEIqV,iCAAiChiB,IAAjC,CAAsCL,GAAtC,CAAJ,EAAgD;aACrC,EAAT;;;MAGEuiB,iCAAiCliB,IAAjC,CAAsCL,GAAtC,CAAJ,EAAgD;aACrC,EAAT;;;;;MAKEwiB,OAAOniB,IAAP,CAAYL,GAAZ,CAAJ,EAAsB;aACX,EAAT;;;MAGEyiB,OAAOpiB,IAAP,CAAYL,GAAZ,CAAJ,EAAsB;aACX,EAAT;;;;;SAKKgN,KAAP;;;;AAIF,AAAO,SAAS4V,SAAT,CAAmB/X,IAAnB,EAAyB;MAC1BA,KAAKzE,IAAL,CAAU,KAAV,CAAJ,EAAsB;WACb,CAAP;;;SAGK,CAAP;;;;;AAKF,AAAO,SAASyc,cAAT,CAAwBhY,IAAxB,EAA8B;MAC/BmC,QAAQ,CAAZ;MACM8V,aAAajY,KAAKV,OAAL,CAAa,QAAb,EAAuBwF,KAAvB,EAAnB;;MAEImT,WAAWphB,MAAX,KAAsB,CAA1B,EAA6B;aAClB,EAAT;;;MAGI6M,UAAU1D,KAAKiB,MAAL,EAAhB;MACIiX,iBAAJ;MACIxU,QAAQ7M,MAAR,KAAmB,CAAvB,EAA0B;eACb6M,QAAQzC,MAAR,EAAX;;;GAGDyC,OAAD,EAAUwU,QAAV,EAAoBrU,OAApB,CAA4B,UAACxI,KAAD,EAAW;QACjCmG,iBAAehM,IAAf,CAAoBqiB,OAAOxc,KAAP,CAApB,CAAJ,EAAwC;eAC7B,EAAT;;GAFJ;;SAMO8G,KAAP;;;;;AAKF,AAAO,SAASgW,cAAT,CAAwBnY,IAAxB,EAA8B;MAC/BmC,QAAQ,CAAZ;MACMkC,WAAWrE,KAAK3B,IAAL,EAAjB;MACMI,UAAU4F,SAAS3K,GAAT,CAAa,CAAb,CAAhB;;MAEI+E,WAAWA,QAAQH,OAAR,CAAgBxH,WAAhB,OAAkC,YAAjD,EAA+D;aACpD,EAAT;;;MAGE0K,iBAAehM,IAAf,CAAoBqiB,OAAOxT,QAAP,CAApB,CAAJ,EAA2C;aAChC,EAAT;;;SAGKlC,KAAP;;;AAGF,AAAO,SAASiW,iBAAT,CAA2BpY,IAA3B,EAAiC;MAClCmC,QAAQ,CAAZ;;MAEMjC,QAAQmC,WAAWrC,KAAKzE,IAAL,CAAU,OAAV,CAAX,CAAd;MACM0E,SAASoC,WAAWrC,KAAKzE,IAAL,CAAU,QAAV,CAAX,CAAf;MACMsP,MAAM7K,KAAKzE,IAAL,CAAU,KAAV,CAAZ;;;MAGI2E,SAASA,SAAS,EAAtB,EAA0B;aACf,EAAT;;;;MAIED,UAAUA,UAAU,EAAxB,EAA4B;aACjB,EAAT;;;MAGEC,SAASD,MAAT,IAAmB,CAAC4K,IAAIjT,QAAJ,CAAa,QAAb,CAAxB,EAAgD;QACxCygB,OAAOnY,QAAQD,MAArB;QACIoY,OAAO,IAAX,EAAiB;;eACN,GAAT;KADF,MAEO;eACIzV,KAAK0V,KAAL,CAAWD,OAAO,IAAlB,CAAT;;;;SAIGlW,KAAP;;;AAGF,AAAO,SAASoW,eAAT,CAAyBC,KAAzB,EAAgC9hB,KAAhC,EAAuC;SACpC8hB,MAAM3hB,MAAN,GAAe,CAAhB,GAAqBH,KAA5B;;;AC1GF;;;;;;;;AAQA,IAAM+hB,+BAA+B;SAAA,yBACM;QAA/B1d,CAA+B,QAA/BA,CAA+B;QAA5B3C,OAA4B,QAA5BA,OAA4B;QAAnBke,SAAmB,QAAnBA,SAAmB;QAARzW,IAAQ,QAARA,IAAQ;;QACnC6Y,iBAAJ;QACI,CAAC3d,EAAE1B,OAAH,IAAc0B,EAAE,MAAF,EAAUlE,MAAV,KAAqB,CAAvC,EAA0C;QACtC,GAAF,EAAOiO,KAAP,GAAe0F,OAAf,CAAuB3K,IAAvB;;;;;;;QAOI8Y,WACJlS,mBACE1L,CADF,EAEEsc,wBAFF,EAGEf,SAHF,EAIE,KAJF,CADF;;QAQIqC,QAAJ,EAAc;iBACDpD,QAAWoD,QAAX,CAAX;;UAEID,QAAJ,EAAc,OAAOA,QAAP;;;;;;QAMVvS,WAAWpL,EAAE3C,OAAF,CAAjB;QACMwgB,OAAO7d,EAAE,KAAF,EAASoL,QAAT,EAAmBgB,OAAnB,EAAb;QACM0R,YAAY,EAAlB;;SAEKhV,OAAL,CAAa,UAACvD,GAAD,EAAM5J,KAAN,EAAgB;UACrBsJ,OAAOjF,EAAEuF,GAAF,CAAb;UACMuK,MAAM7K,KAAKzE,IAAL,CAAU,KAAV,CAAZ;;UAEI,CAACsP,GAAL,EAAU;;UAEN1I,QAAQ2V,cAAcjN,GAAd,CAAZ;eACSkN,UAAU/X,IAAV,CAAT;eACSgY,eAAehY,IAAf,CAAT;eACSmY,eAAenY,IAAf,CAAT;eACSoY,kBAAkBpY,IAAlB,CAAT;eACSuY,gBAAgBK,IAAhB,EAAsBliB,KAAtB,CAAT;;gBAEUmU,GAAV,IAAiB1I,KAAjB;KAbF;;gCAiBE,iBAAgB0W,SAAhB,EAA2BphB,MAA3B,CAAkC,UAACC,GAAD,EAAMkI,GAAN;aAChCiZ,UAAUjZ,GAAV,IAAiBlI,IAAI,CAAJ,CAAjB,GAA0B,CAACkI,GAAD,EAAMiZ,UAAUjZ,GAAV,CAAN,CAA1B,GAAkDlI,GADlB;KAAlC,EAEE,CAAC,IAAD,EAAO,CAAP,CAFF,CAhDqC;;QA+ChCohB,MA/CgC;QA+CxB5U,QA/CwB;;QAoDnCA,WAAW,CAAf,EAAkB;iBACLqR,QAAWuD,MAAX,CAAX;;UAEIJ,QAAJ,EAAc,OAAOA,QAAP;;;;;;;;;;wCAKOpB,wBAAvB,4GAAiD;YAAtCrb,QAAsC;;YACzCZ,QAAQN,EAAEkB,QAAF,EAAY6I,KAAZ,EAAd;YACM+F,MAAMxP,MAAME,IAAN,CAAW,KAAX,CAAZ;YACIsP,GAAJ,EAAS;qBACI0K,QAAW1K,GAAX,CAAX;cACI6N,QAAJ,EAAc,OAAOA,QAAP;;;YAGV/d,OAAOU,MAAME,IAAN,CAAW,MAAX,CAAb;YACIZ,IAAJ,EAAU;qBACG4a,QAAW5a,IAAX,CAAX;cACI+d,QAAJ,EAAc,OAAOA,QAAP;;;YAGVpd,QAAQD,MAAME,IAAN,CAAW,OAAX,CAAd;YACID,KAAJ,EAAW;qBACEia,QAAWja,KAAX,CAAX;cACIod,QAAJ,EAAc,OAAOA,QAAP;;;;;;;;;;;;;;;;;;WAIX,IAAP;;CAlFJ;;AAsFA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AC7Ge,SAASK,eAAT,CAAyB5W,KAAzB,EAAgC6W,UAAhC,EAA4Cre,IAA5C,EAAkD;;;;;;MAM3DwH,QAAQ,CAAZ,EAAe;QACP8W,aAAa,IAAIC,QAAQC,eAAZ,CAA4B,IAA5B,EAAkCH,UAAlC,EAA8Cre,IAA9C,EAAoDye,KAApD,EAAnB;;;;;;;QAOMC,cAAc,MAAMJ,UAA1B;QACMK,eAAe,EAAE,OAAOD,cAAc,GAArB,CAAF,CAArB;WACOlX,QAAQmX,YAAf;;;SAGK,CAAP;;;ACnBa,SAASC,aAAT,CAAuBhT,QAAvB,EAAiCnQ,OAAjC,EAA0C;;;;;MAKnD+L,QAAQ,CAAZ;;MAEIrM,YAAYN,IAAZ,CAAiB+Q,SAAStR,IAAT,EAAjB,CAAJ,EAAuC;QAC/BukB,gBAAgBnjB,SAASkQ,QAAT,EAAmB,EAAnB,CAAtB;;;;QAIIiT,gBAAgB,CAApB,EAAuB;cACb,CAAC,EAAT;KADF,MAEO;cACG5W,KAAKE,GAAL,CAAS,CAAT,EAAY,KAAK0W,aAAjB,CAAR;;;;;;QAMEpjB,WAAWA,WAAWojB,aAA1B,EAAyC;eAC9B,EAAT;;;;SAIGrX,KAAP;;;AC5Ba,SAASsX,eAAT,CAAyBrjB,OAAzB,EAAkCsjB,IAAlC,EAAwC;;;;MAIjDtjB,WAAW,CAACsjB,IAAhB,EAAsB;WACb,EAAP;;;SAGK,CAAP;;;ACRK,IAAMhY,aAAW,IAAjB;;;;AAIP,AAAO,IAAMiY,0BAAwB,CACnC,OADmC,EAEnC,SAFmC,EAGnC,SAHmC,EAInC,SAJmC,EAKnC,QALmC,EAMnC,OANmC,EAOnC,OAPmC,EAQnC,OARmC,EASnC,KATmC,EAUnC,OAVmC,EAWnC,MAXmC,EAYnC,QAZmC,EAanC,KAbmC,EAcnC,iBAdmC,CAA9B;AAgBP,AAAO,IAAMC,6BAA2B,IAAIjkB,MAAJ,CAAWgkB,wBAAsB3hB,IAAtB,CAA2B,GAA3B,CAAX,EAA4C,GAA5C,CAAjC;;;;;AAKP,AAAO,IAAM6hB,sBAAoB,IAAIlkB,MAAJ,CAAW,4CAAX,EAAyD,GAAzD,CAA1B;;;;AAIP,AAAO,IAAMmkB,qBAAmB,IAAInkB,MAAJ,CAAW,kBAAX,EAA+B,GAA/B,CAAzB;;;;AAIP,AAAO,IAAMokB,sBAAoB,IAAIpkB,MAAJ,CAAW,yBAAX,EAAsC,GAAtC,CAA1B;;8EAGP,AAAO,AAAMwH;;AClCE,SAAS6c,oBAAT,CAA8Brf,IAA9B,EAAoC;;MAE7Cif,2BAAyBpkB,IAAzB,CAA8BmF,IAA9B,CAAJ,EAAyC;WAChC,CAAC,EAAR;;;SAGK,CAAP;;;ACAF,SAASsf,SAAT,CAAiBC,KAAjB,EAAwB;UACZA,MAAM3e,IAAN,CAAW,OAAX,KAAuB,EAAjC,WAAuC2e,MAAM3e,IAAN,CAAW,IAAX,KAAoB,EAA3D;;;AAGF,AAAe,SAASyc,gBAAT,CAAwBkC,KAAxB,EAA+B;;;;MAIxCxW,UAAUwW,MAAMjZ,MAAN,EAAd;MACIkZ,gBAAgB,KAApB;MACIC,gBAAgB,KAApB;MACIjY,QAAQ,CAAZ;;cAEWtJ,MAAM,CAAN,EAAS,CAAT,CAAX,EAAwBgL,OAAxB,CAAgC,YAAM;QAChCH,QAAQ7M,MAAR,KAAmB,CAAvB,EAA0B;;;;QAIpBwjB,aAAaJ,UAAQvW,OAAR,EAAiB,GAAjB,CAAnB;;;;QAII,CAACyW,aAAD,IAAkBhd,QAAQ3H,IAAR,CAAa6kB,UAAb,CAAtB,EAAgD;sBAC9B,IAAhB;eACS,EAAT;;;;;;QAME,CAACD,aAAD,IAAkBnd,kBAAkBzH,IAAlB,CAAuB6kB,UAAvB,CAAlB,IACET,2BAAyBpkB,IAAzB,CAA8B6kB,UAA9B,CADN,EACiD;UAC3C,CAACtd,kBAAkBvH,IAAlB,CAAuB6kB,UAAvB,CAAL,EAAyC;wBACvB,IAAhB;iBACS,EAAT;;;;cAIM3W,QAAQzC,MAAR,EAAV;GAzBF;;SA4BOkB,KAAP;;;AC/Ca,SAASmY,aAAT,CAAuBC,QAAvB,EAAiC;;;MAG1CR,oBAAkBvkB,IAAlB,CAAuB+kB,QAAvB,CAAJ,EAAsC;WAC7B,CAAC,GAAR;;;SAGK,CAAP;;;ACFa,SAASC,WAAT,CACb7f,IADa,EAEbqe,UAFa,EAGbyB,OAHa,EAIbxjB,SAJa,EAKbsP,QALa,EAMbmU,YANa,EAOb;;MAEIA,aAAaplB,IAAb,CAAkB;WAAOqF,SAASxF,GAAhB;GAAlB,MAA2C6S,SAA/C,EAA0D;WACjD,KAAP;;;;;MAKE,CAACrN,IAAD,IAASA,SAASqe,UAAlB,IAAgCre,SAAS8f,OAA7C,EAAsD;WAC7C,KAAP;;;MAGMxhB,QAZR,GAYqBhC,SAZrB,CAYQgC,QAZR;;mBAa+B/B,IAAIC,KAAJ,CAAUwD,IAAV,CAb/B;MAakBggB,QAblB,cAaQ1hB,QAbR;;;;;MAgBI0hB,aAAa1hB,QAAjB,EAA2B;WAClB,KAAP;;;;;MAKI2hB,WAAWjgB,KAAK3F,OAAL,CAAaylB,OAAb,EAAsB,EAAtB,CAAjB;MACI,CAAC/Y,WAASlM,IAAT,CAAcolB,QAAd,CAAL,EAA8B;WACrB,KAAP;;;;;MAKEhB,2BAAyBpkB,IAAzB,CAA8B+Q,QAA9B,CAAJ,EAA6C;WACpC,KAAP;;;;MAIEA,SAAS1P,MAAT,GAAkB,EAAtB,EAA0B;WACjB,KAAP;;;SAGK,IAAP;;;ACpDa,SAASgkB,YAAT,CAAsBlgB,IAAtB,EAA4BmgB,SAA5B,EAAuC;;;;;MAKhD,CAACA,UAAUtlB,IAAV,CAAemF,IAAf,CAAL,EAA2B;WAClB,CAAC,EAAR;;;SAGK,CAAP;;;ACPa,SAASogB,iBAAT,CAA2BR,QAA3B,EAAqC;;MAE9CV,oBAAkBrkB,IAAlB,CAAuB+kB,QAAvB,CAAJ,EAAsC;WAC7B,EAAP;;;SAGK,CAAP;;;ACHa,SAASS,aAAT,CAAuBT,QAAvB,EAAiC;;MAE1CT,mBAAiBtkB,IAAjB,CAAsB+kB,QAAtB,CAAJ,EAAqC;;;;;QAK/BV,oBAAkBrkB,IAAlB,CAAuB+kB,QAAvB,CAAJ,EAAsC;aAC7B,CAAC,EAAR;;;;SAIG,CAAP;;;ACOK,SAASU,aAAT,CAAuBR,OAAvB,EAAgC;SAC9B,IAAI9kB,MAAJ,OAAe8kB,OAAf,EAA0B,GAA1B,CAAP;;;AAGF,SAASR,OAAT,CAAiBC,KAAjB,EAAwB3T,QAAxB,EAAkC;UACtBA,YAAY2T,MAAMnlB,IAAN,EAAtB,WAAsCmlB,MAAM3e,IAAN,CAAW,OAAX,KAAuB,EAA7D,WAAmE2e,MAAM3e,IAAN,CAAW,IAAX,KAAoB,EAAvF;;;AAGF,AAAe,SAAS2f,UAAT,OAOZ;MANDC,KAMC,QANDA,KAMC;MALDnC,UAKC,QALDA,UAKC;MAJDyB,OAIC,QAJDA,OAIC;MAHDxjB,SAGC,QAHDA,SAGC;MAFD8D,CAEC,QAFDA,CAEC;+BADD2f,YACC;MADDA,YACC,qCADc,EACd;;cACWzjB,aAAaC,IAAIC,KAAJ,CAAU6hB,UAAV,CAAzB;MACM8B,YAAYG,cAAcR,OAAd,CAAlB;MACMf,OAAOxR,YAAYnN,CAAZ,CAAb;;;;;;;;;MASMqgB,cAAcD,MAAM1jB,MAAN,CAAa,UAAC4jB,aAAD,EAAgBC,IAAhB,EAAyB;;;;QAIlD7b,QAAQC,SAAS4b,IAAT,CAAd;;;QAGI,CAAC7b,MAAM9E,IAAX,EAAiB,OAAO0gB,aAAP;;QAEX1gB,OAAOrE,aAAamJ,MAAM9E,IAAnB,CAAb;QACMuf,QAAQnf,EAAEugB,IAAF,CAAd;QACM/U,WAAW2T,MAAMnlB,IAAN,EAAjB;;QAEI,CAACylB,YAAY7f,IAAZ,EAAkBqe,UAAlB,EAA8ByB,OAA9B,EAAuCxjB,SAAvC,EAAkDsP,QAAlD,EAA4DmU,YAA5D,CAAL,EAAgF;aACvEW,aAAP;;;;QAIE,CAACA,cAAc1gB,IAAd,CAAL,EAA0B;oBACVA,IAAd,IAAsB;eACb,CADa;0BAAA;;OAAtB;KADF,MAMO;oBACSA,IAAd,EAAoB4L,QAApB,GAAkC8U,cAAc1gB,IAAd,EAAoB4L,QAAtD,SAAkEA,QAAlE;;;QAGIgV,eAAeF,cAAc1gB,IAAd,CAArB;QACM4f,WAAWN,QAAQC,KAAR,EAAe3T,QAAf,CAAjB;QACMnQ,UAAUH,eAAe0E,IAAf,CAAhB;;QAEIwH,QAAQ0Y,aAAalgB,IAAb,EAAmBmgB,SAAnB,CAAZ;aACSC,kBAAkBR,QAAlB,CAAT;aACSS,cAAcT,QAAd,CAAT;aACSD,cAAcC,QAAd,CAAT;aACSvC,iBAAekC,KAAf,CAAT;aACSF,qBAAqBrf,IAArB,CAAT;aACS8e,gBAAgBrjB,OAAhB,EAAyBsjB,IAAzB,CAAT;aACSH,cAAchT,QAAd,EAAwBnQ,OAAxB,CAAT;aACS2iB,gBAAgB5W,KAAhB,EAAuB6W,UAAvB,EAAmCre,IAAnC,CAAT;;iBAEawH,KAAb,GAAqBA,KAArB;;WAEOkZ,aAAP;GA5CkB,EA6CjB,EA7CiB,CAApB;;SA+CO,iBAAgBD,WAAhB,EAA6BvkB,MAA7B,KAAwC,CAAxC,GAA4C,IAA5C,GAAmDukB,WAA1D;;;AC1FF;;AAEA,IAAMI,8BAA8B;SAAA,yBACgB;QAAxCzgB,CAAwC,QAAxCA,CAAwC;QAArC5F,GAAqC,QAArCA,GAAqC;QAAhC8B,SAAgC,QAAhCA,SAAgC;iCAArByjB,YAAqB;QAArBA,YAAqB,qCAAN,EAAM;;gBACpCzjB,aAAaC,IAAIC,KAAJ,CAAUhC,GAAV,CAAzB;;QAEM6jB,aAAa1iB,aAAanB,GAAb,CAAnB;QACMslB,UAAU1jB,eAAe5B,GAAf,EAAoB8B,SAApB,CAAhB;;QAEMkkB,QAAQpgB,EAAE,SAAF,EAAaoM,OAAb,EAAd;;QAEMsU,cAAcP,WAAW;kBAAA;4BAAA;sBAAA;0BAAA;UAAA;;KAAX,CAApB;;;QAUI,CAACO,WAAL,EAAkB,OAAO,IAAP;;;;QAIZC,UAAU,iBAAgBD,WAAhB,EAA6BhkB,MAA7B,CAAoC,UAACC,GAAD,EAAM4jB,IAAN,EAAe;UAC3DK,aAAaF,YAAYH,IAAZ,CAAnB;aACOK,WAAWxZ,KAAX,GAAmBzK,IAAIyK,KAAvB,GAA+BwZ,UAA/B,GAA4CjkB,GAAnD;KAFc,EAGb,EAAEyK,OAAO,CAAC,GAAV,EAHa,CAAhB;;;;QAOIuZ,QAAQvZ,KAAR,IAAiB,EAArB,EAAyB;aAChBuZ,QAAQ/gB,IAAf;;;WAGK,IAAP;;CAlCJ,CAsCA;;AChDO,IAAMihB,2BAA2B,CACtC,QADsC,CAAjC;;ACKP,SAASC,WAAT,CAAqB1mB,GAArB,EAA0B;MAClB8B,YAAYC,IAAIC,KAAJ,CAAUhC,GAAV,CAAlB;MACQ8D,QAFgB,GAEHhC,SAFG,CAEhBgC,QAFgB;;SAGjBA,QAAP;;;AAGF,SAASoQ,MAAT,CAAgBlU,GAAhB,EAAqB;SACZ;YAAA;YAEG0mB,YAAY1mB,GAAZ;GAFV;;;AAMF,IAAM2mB,sBAAsB;SAAA,yBACK;QAArB/gB,CAAqB,QAArBA,CAAqB;QAAlB5F,GAAkB,QAAlBA,GAAkB;QAAbmhB,SAAa,QAAbA,SAAa;;QACvByF,aAAahhB,EAAE,qBAAF,CAAnB;QACIghB,WAAWllB,MAAX,KAAsB,CAA1B,EAA6B;UACrB8D,OAAOohB,WAAWxgB,IAAX,CAAgB,MAAhB,CAAb;UACIZ,IAAJ,EAAU;eACD0O,OAAO1O,IAAP,CAAP;;;;QAIEqhB,UAAUvV,mBAAgB1L,CAAhB,EAAmB6gB,wBAAnB,EAA6CtF,SAA7C,CAAhB;QACI0F,OAAJ,EAAa;aACJ3S,OAAO2S,OAAP,CAAP;;;WAGK3S,OAAOlU,GAAP,CAAP;;CAfJ,CAoBA;;ACtCO,IAAM8mB,yBAAyB,CACpC,gBADoC,EAEpC,qBAFoC,CAA/B;;ACSA,SAAShT,OAAT,CAAe7Q,OAAf,EAAwB2C,CAAxB,EAA4C;MAAjBmhB,SAAiB,uEAAL,GAAK;;YACvC9jB,QAAQpD,OAAR,CAAgB,UAAhB,EAA4B,GAA5B,EAAiCC,IAAjC,EAAV;SACOknB,UAAU/jB,OAAV,EAAmB8jB,SAAnB,EAA8B,EAAEE,SAAS,UAAX,EAA9B,CAAP;;;AAGF,IAAMC,0BAA0B;SAAA,yBACK;QAAzBthB,CAAyB,QAAzBA,CAAyB;QAAtB3C,OAAsB,QAAtBA,OAAsB;QAAbke,SAAa,QAAbA,SAAa;;QAC3BtD,UAAUvM,mBAAgB1L,CAAhB,EAAmBkhB,sBAAnB,EAA2C3F,SAA3C,CAAhB;QACItD,OAAJ,EAAa;aACJ/J,QAAM5B,UAAU2L,OAAV,EAAmBjY,CAAnB,CAAN,CAAP;;;QAGImhB,YAAY,GAAlB;QACMI,eAAelkB,QAAQE,KAAR,CAAc,CAAd,EAAiB4jB,YAAY,CAA7B,CAArB;WACOjT,QAAMlO,EAAEuhB,YAAF,EAAgBvnB,IAAhB,EAAN,EAA8BgG,CAA9B,EAAiCmhB,SAAjC,CAAP;;CATJ,CAaA;;ACvBA,IAAMK,4BAA4B;SAAA,yBACX;QAAXnkB,OAAW,QAAXA,OAAW;;QACb2C,IAAI3B,QAAQuQ,IAAR,CAAavR,OAAb,CAAV;QACM+N,WAAWpL,EAAE,KAAF,EAAS+J,KAAT,EAAjB;;QAEM/P,OAAOD,gBAAgBqR,SAASpR,IAAT,EAAhB,CAAb;WACOA,KAAKwB,KAAL,CAAW,IAAX,EAAiBM,MAAxB;;CANJ,CAUA;;ACAA,IAAM2lB,mBAAmB;;UAEf,GAFe;SAGhBnG,sBAAsBoG,OAHN;kBAIPvF,8BAA8BuF,OAJvB;UAKf7F,uBAAuB6F,OALR;WAMd7G,wBAAwB6G,OAAxB,CAAgCC,IAAhC,CAAqC9G,uBAArC,CANc;kBAOP6C,6BAA6BgE,OAPtB;OAQlBrF,oBAAoBqF,OARF;iBASRjB,4BAA4BiB,OATpB;kBAUPX,oBAAoBW,OAVb;WAWdJ,wBAAwBI,OAXV;cAYXF,0BAA0BE,OAZf;aAaZ;QAAG7W,KAAH,QAAGA,KAAH;WAAe+W,gBAAgBC,YAAhB,CAA6BhX,KAA7B,CAAf;GAbY;;SAAA,mBAefjM,OAfe,EAeN;QACPkG,IADO,GACKlG,OADL,CACPkG,IADO;QACD9E,CADC,GACKpB,OADL,CACDoB,CADC;;;QAGX8E,QAAQ,CAAC9E,CAAb,EAAgB;UACR8hB,SAASzjB,QAAQuQ,IAAR,CAAa9J,IAAb,CAAf;cACQ9E,CAAR,GAAY8hB,MAAZ;;;QAGIjX,QAAQ,KAAKA,KAAL,CAAWjM,OAAX,CAAd;QACMmjB,iBAAiB,KAAKA,cAAL,CAAoBnjB,OAApB,CAAvB;QACM+Y,SAAS,KAAKA,MAAL,CAAY/Y,OAAZ,CAAf;QACMvB,UAAU,KAAKA,OAAL,cAAkBuB,OAAlB,IAA2BiM,YAA3B,IAAhB;QACM8I,iBAAiB,KAAKA,cAAL,cAAyB/U,OAAzB,IAAkCvB,gBAAlC,IAAvB;QACM2a,MAAM,KAAKA,GAAL,cAAcpZ,OAAd,IAAuBvB,gBAAvB,IAAZ;QACM2kB,gBAAgB,KAAKA,aAAL,CAAmBpjB,OAAnB,CAAtB;QACMqZ,UAAU,KAAKA,OAAL,cAAkBrZ,OAAlB,IAA2BvB,gBAA3B,IAAhB;QACM4kB,aAAa,KAAKA,UAAL,cAAqBrjB,OAArB,IAA8BvB,gBAA9B,IAAnB;QACM6kB,YAAY,KAAKA,SAAL,CAAe,EAAErX,YAAF,EAAf,CAAlB;;0BACwB,KAAKsX,cAAL,CAAoBvjB,OAApB,CAlBT;QAkBPxE,GAlBO,mBAkBPA,GAlBO;QAkBF8U,MAlBE,mBAkBFA,MAlBE;;WAoBR;kBAAA;oBAAA;sBAGW6S,kBAAkB,IAH7B;cAAA;oCAAA;sBAAA;kCAAA;cAAA;oBAAA;sBAAA;4BAAA;;KAAP;;CAnCJ,CAoDA;;AC7DA,IAAMK,YAAY;kDACgCpR,eADhC;6CAE2B3B;CAF7C;;AAKA,AAAe,SAASgT,YAAT,CAAsBriB,CAAtB,EAAyB;MAChCkB,WAAW,iBAAgBkhB,SAAhB,EAA2B7nB,IAA3B,CAAgC;WAAKyF,EAAEsiB,CAAF,EAAKxmB,MAAL,GAAc,CAAnB;GAAhC,CAAjB;;SAEOsmB,UAAUlhB,QAAV,CAAP;;;ACPa,SAASqhB,YAAT,CAAsBnoB,GAAtB,EAA2B8B,SAA3B,EAAsC8D,CAAtC,EAAyC;cAC1C9D,aAAaC,IAAIC,KAAJ,CAAUhC,GAAV,CAAzB;mBACqB8B,SAFiC;MAE9CgC,QAF8C,cAE9CA,QAF8C;;MAGhDskB,aAAatkB,SAAS1C,KAAT,CAAe,GAAf,EAAoB+B,KAApB,CAA0B,CAAC,CAA3B,EAA8BN,IAA9B,CAAmC,GAAnC,CAAnB;;SAEOwlB,WAAWvkB,QAAX,KAAwBukB,WAAWD,UAAX,CAAxB,IACLH,aAAariB,CAAb,CADK,IACcyhB,gBADrB;;;ACPF;AACA,AAAO,SAASiB,gBAAT,CAA0BtX,QAA1B,EAAoCpL,CAApC,QAAkD;MAATkO,KAAS,QAATA,KAAS;;MACnD,CAACA,KAAL,EAAY,OAAO9C,QAAP;;IAEV8C,MAAMjR,IAAN,CAAW,GAAX,CAAF,EAAmBmO,QAAnB,EAA6BpI,MAA7B;;SAEOoI,QAAP;;;;AAIF,AAAO,SAASuX,iBAAT,CAA2BvX,QAA3B,EAAqCpL,CAArC,SAAwD;MAAd4iB,UAAc,SAAdA,UAAc;;MACzD,CAACA,UAAL,EAAiB,OAAOxX,QAAP;;mBAEDwX,UAAhB,EAA4B9Z,OAA5B,CAAoC,UAACjE,GAAD,EAAS;QACrCge,WAAW7iB,EAAE6E,GAAF,EAAOuG,QAAP,CAAjB;QACM7K,QAAQqiB,WAAW/d,GAAX,CAAd;;;QAGI,OAAOtE,KAAP,KAAiB,QAArB,EAA+B;eACpBJ,IAAT,CAAc,UAACxE,KAAD,EAAQ0E,IAAR,EAAiB;yBACfL,EAAEK,IAAF,CAAd,EAAuBL,CAAvB,EAA0B4iB,WAAW/d,GAAX,CAA1B;OADF;KADF,MAIO,IAAI,OAAOtE,KAAP,KAAiB,UAArB,EAAiC;;eAE7BJ,IAAT,CAAc,UAACxE,KAAD,EAAQ0E,IAAR,EAAiB;YACvBiO,SAAS/N,MAAMP,EAAEK,IAAF,CAAN,EAAeL,CAAf,CAAf;;YAEI,OAAOsO,MAAP,KAAkB,QAAtB,EAAgC;2BAChBtO,EAAEK,IAAF,CAAd,EAAuBL,CAAvB,EAA0BsO,MAA1B;;OAJJ;;GAXJ;;SAqBOlD,QAAP;;;AAGF,SAAS0X,oBAAT,CAA8B9iB,CAA9B,EAAiC2M,SAAjC,EAA4CoW,WAA5C,EAAyD;SAChDpW,UAAUpS,IAAV,CAAe,UAAC2G,QAAD,EAAc;QAC9B8hB,MAAMC,OAAN,CAAc/hB,QAAd,CAAJ,EAA6B;UACvB6hB,WAAJ,EAAiB;eACR7hB,SAASxE,MAAT,CAAgB,UAACC,GAAD,EAAM2lB,CAAN;iBAAY3lB,OAAOqD,EAAEsiB,CAAF,EAAKxmB,MAAL,GAAc,CAAjC;SAAhB,EAAoD,IAApD,CAAP;;;qCAGgBoF,QALS;UAKpBohB,CALoB;UAKjB9hB,IALiB;;aAMpBR,EAAEsiB,CAAF,EAAKxmB,MAAL,KAAgB,CAAhB,IAAqBkE,EAAEsiB,CAAF,EAAK9hB,IAAL,CAAUA,IAAV,CAArB,IAAwCR,EAAEsiB,CAAF,EAAK9hB,IAAL,CAAUA,IAAV,EAAgBtG,IAAhB,OAA2B,EAA1E;;;WAGK8F,EAAEkB,QAAF,EAAYpF,MAAZ,KAAuB,CAAvB,IAA4BkE,EAAEkB,QAAF,EAAYlH,IAAZ,GAAmBE,IAAnB,OAA8B,EAAjE;GAVK,CAAP;;;AAcF,AAAO,SAASgpB,MAAT,CAAgBvI,IAAhB,EAAsB;MACnB3a,CADmB,GAC8B2a,IAD9B,CACnB3a,CADmB;MAChBiM,IADgB,GAC8B0O,IAD9B,CAChB1O,IADgB;MACVkX,cADU,GAC8BxI,IAD9B,CACVwI,cADU;0BAC8BxI,IAD9B,CACMoI,WADN;MACMA,WADN,qCACoB,KADpB;;;MAGvB,CAACI,cAAL,EAAqB,OAAO,IAAP;;;;MAIjB,OAAOA,cAAP,KAA0B,QAA9B,EAAwC,OAAOA,cAAP;;MAEhCxW,SATmB,GASkBwW,cATlB,CASnBxW,SATmB;8BASkBwW,cATlB,CASRjK,cATQ;MASRA,cATQ,yCASS,IATT;;;MAWrBkK,mBAAmBN,qBAAqB9iB,CAArB,EAAwB2M,SAAxB,EAAmCoW,WAAnC,CAAzB;;MAEI,CAACK,gBAAL,EAAuB,OAAO,IAAP;;;;;;;;MAQnBhY,iBAAJ;MACI2X,WAAJ,EAAiB;;;;;QAKXC,MAAMC,OAAN,CAAcG,gBAAd,CAAJ,EAAqC;;mBACxBpjB,EAAEojB,iBAAiBnmB,IAAjB,CAAsB,GAAtB,CAAF,CAAX;YACMomB,WAAWrjB,EAAE,aAAF,CAAjB;iBACSG,IAAT,CAAc,UAACxE,KAAD,EAAQwH,OAAR,EAAoB;mBACvBqG,MAAT,CAAgBrG,OAAhB;SADF;;mBAIWkgB,QAAX;;KAPF,MAQO;iBACMrjB,EAAEojB,gBAAF,CAAX;;;;aAIOE,IAAT,CAActjB,EAAE,aAAF,CAAd;eACWoL,SAASlF,MAAT,EAAX;;eAEWyc,kBAAkBvX,QAAlB,EAA4BpL,CAA5B,EAA+BmjB,cAA/B,CAAX;eACWT,iBAAiBtX,QAAjB,EAA2BpL,CAA3B,EAA8BmjB,cAA9B,CAAX;;eAEW5I,SAAStO,IAAT,EAAeb,QAAf,eAA8BuP,IAA9B,IAAoCzB,8BAApC,IAAX;;WAEOlZ,EAAE8E,IAAF,CAAOsG,QAAP,CAAP;;;MAGEkD,eAAJ;;;;MAII0U,MAAMC,OAAN,CAAcG,gBAAd,CAAJ,EAAqC;2CACVA,gBADU;QAC5BliB,QAD4B;QAClBV,IADkB;;aAE1BR,EAAEkB,QAAF,EAAYV,IAAZ,CAAiBA,IAAjB,EAAuBtG,IAAvB,EAAT;GAFF,MAGO;QACDoG,QAAQN,EAAEojB,gBAAF,CAAZ;;YAEQV,iBAAiBpiB,KAAjB,EAAwBN,CAAxB,EAA2BmjB,cAA3B,CAAR;YACQR,kBAAkBriB,KAAlB,EAAyBN,CAAzB,EAA4BmjB,cAA5B,CAAR;;aAES7iB,MAAMtG,IAAN,GAAaE,IAAb,EAAT;;;;;MAKEgf,cAAJ,EAAoB;WACXqB,SAAStO,IAAT,EAAeqC,MAAf,eAA4BqM,IAA5B,EAAqCwI,cAArC,EAAP;;;SAGK7U,MAAP;;;AAGF,SAASiV,aAAT,CAAuB5I,IAAvB,EAA6B;MACnB1O,IADmB,GACkB0O,IADlB,CACnB1O,IADmB;MACb+C,SADa,GACkB2L,IADlB,CACb3L,SADa;uBACkB2L,IADlB,CACF6I,QADE;MACFA,QADE,kCACS,IADT;;;MAGrBlV,SAAS4U,oBAAYvI,IAAZ,IAAkBwI,gBAAgBnU,UAAU/C,IAAV,CAAlC,IAAf;;;MAGIqC,MAAJ,EAAY;WACHA,MAAP;;;;;MAKEkV,QAAJ,EAAc,OAAO/B,iBAAiBxV,IAAjB,EAAuB0O,IAAvB,CAAP;;SAEP,IAAP;;;AAGF,IAAM8I,gBAAgB;SAAA,qBACwB;QAApCzU,SAAoC,uEAAxByS,gBAAwB;QAAN9G,IAAM;gBACFA,IADE;QAClC+I,WADkC,SAClCA,WADkC;QACrBC,cADqB,SACrBA,cADqB;;;QAGtC3U,UAAUE,MAAV,KAAqB,GAAzB,EAA8B,OAAOF,UAAU0S,OAAV,CAAkB/G,IAAlB,CAAP;;wBAGzBA,IADL;;;;QAKI+I,WAAJ,EAAiB;UACTrmB,WAAUkmB,2BACX5I,IADW,IACL1O,MAAM,SADD,EACY8W,aAAa,IADzB,EAC+BlY,OAAO8Y;SADtD;aAGO;;OAAP;;QAII9Y,QAAQ0Y,2BAAmB5I,IAAnB,IAAyB1O,MAAM,OAA/B,IAAd;QACM8V,iBAAiBwB,2BAAmB5I,IAAnB,IAAyB1O,MAAM,gBAA/B,IAAvB;QACM0L,SAAS4L,2BAAmB5I,IAAnB,IAAyB1O,MAAM,QAA/B,IAAf;QACM+V,gBAAgBuB,2BAAmB5I,IAAnB,IAAyB1O,MAAM,eAA/B,IAAtB;QACM5O,UAAUkmB,2BACX5I,IADW,IACL1O,MAAM,SADD,EACY8W,aAAa,IADzB,EAC+BlY;OAD/C;QAGM8I,iBAAiB4P,2BAAmB5I,IAAnB,IAAyB1O,MAAM,gBAA/B,EAAiD5O,gBAAjD,IAAvB;QACM4a,UAAUsL,2BAAmB5I,IAAnB,IAAyB1O,MAAM,SAA/B,EAA0C5O,gBAA1C,IAAhB;QACM2a,MAAMuL,2BAAmB5I,IAAnB,IAAyB1O,MAAM,KAA/B,EAAsC5O,gBAAtC,EAA+C4a,gBAA/C,IAAZ;QACMgK,aAAasB,2BAAmB5I,IAAnB,IAAyB1O,MAAM,YAA/B,EAA6C5O,gBAA7C,IAAnB;QACM6kB,YAAYqB,2BAAmB5I,IAAnB,IAAyB1O,MAAM,WAA/B,EAA4CpB,YAA5C,IAAlB;;gBAEE0Y,2BAAmB5I,IAAnB,IAAyB1O,MAAM,gBAA/B,QAAsD,EAAE7R,KAAK,IAAP,EAAa8U,QAAQ,IAArB,EA/Bd;QA8BlC9U,GA9BkC,SA8BlCA,GA9BkC;QA8B7B8U,MA9B6B,SA8B7BA,MA9B6B;;WAiCnC;kBAAA;sBAAA;oBAAA;oCAAA;oCAAA;cAAA;kCAAA;cAAA;oBAAA;sBAAA;4BAAA;;KAAP;;CAlCJ,CAmDA;;ACnMA;wDAAe;QAEX8S,aAFW,SAEXA,aAFW;QAGXld,IAHW,SAGXA,IAHW;QAIX9E,CAJW,SAIXA,CAJW;QAKXub,SALW,SAKXA,SALW;QAMXjN,MANW,SAMXA,MANW;QAOXsV,SAPW,SAOXA,SAPW;QAQX/Y,KARW,SAQXA,KARW;QASXzQ,GATW,SASXA,GATW;;;;;;;iBAAA,GAaD,CAbC;wBAAA,GAcQ,CAACmB,aAAanB,GAAb,CAAD,CAdR;;;;;;kBAkBN4nB,iBAAiB6B,QAAQ,EAlBnB;;;;;qBAmBF,CAAT;;mBACU1V,SAAS2V,MAAT,CAAgB9B,aAAhB,CApBC;;;aAAA;;mBAqBJhiB,EAAE8E,IAAF,EAAP;;yBArBW,GAuBW;mBACfkd,aADe;wBAAA;kBAAA;kCAAA;2BAKP,IALO;8BAMJnX,KANI;;aAvBX;0BAAA,GAiCY4Y,cAAc/B,OAAd,CAAsBkC,SAAtB,EAAiCG,aAAjC,CAjCZ;;;yBAmCE/mB,IAAb,CAAkBglB,aAAlB;kCAEK1T,MADL;uBAEcA,OAAOjR,OAAnB,qBAA0CwmB,KAA1C,aAAuDG,eAAe3mB;;;4BAGxD2mB,eAAehC,aAA/B;;;;;sBAzCW,GA4CMP,iBAAiBQ,UAAjB,CAA4B,EAAE5kB,mBAAiBiR,OAAOjR,OAAxB,WAAF,EAA5B,CA5CN;0DA8CRiR,MA9CQ;2BA+CEuV,KA/CF;8BAgDKA,KAhDL;;;;;;;;;;GAAf;;WAA8BI,eAA9B;;;;SAA8BA,eAA9B;;;ACOA,IAAMC,UAAU;OAAA,iBACF9pB,GADE,EACG0K,IADH,EACoB;;;QAAX6V,IAAW,uEAAJ,EAAI;;;;;;;;oCAI5BA,IAJ4B,CAE9BwJ,aAF8B,EAE9BA,aAF8B,uCAEd,IAFc,yCAI5BxJ,IAJ4B,CAG9B6I,QAH8B,EAG9BA,QAH8B,kCAGnB,IAHmB;;;;;;kBAS5B,CAACppB,GAAD,IAAQiE,QAAQC,OAApB,EAA6B;sBACrB8lB,OAAOC,QAAP,CAAgBzkB,IAAtB,CAD2B;uBAEpBkF,QAAQzG,QAAQyG,IAAR,EAAf;;;uBAX8B,GAcd3I,IAAIC,KAAJ,CAAUhC,GAAV,CAdc;;kBAgB3B6D,YAAY/B,SAAZ,CAhB2B;;;;;+CAiBvBiC,OAAO0B,MAjBgB;;;;qBAoBhBsO,SAAS2V,MAAT,CAAgB1pB,GAAhB,EAAqB0K,IAArB,EAA2B5I,SAA3B,CApBgB;;;eAAA;uBAAA,GAsBdqmB,aAAanoB,GAAb,EAAkB8B,SAAlB,EAA6B8D,CAA7B,CAtBc;;;;;mBA0B5BA,EAAEuO,MA1B0B;;;;;+CA2BvBvO,CA3BuB;;;;;;kBAgC5B,CAAC8E,IAAL,EAAW;uBACF9E,EAAE8E,IAAF,EAAP;;;;;uBAjC8B,GAsCd9E,EAAE,MAAF,EAAUiB,GAAV,CAAc,UAACb,CAAD,EAAIC,IAAJ;uBAAaL,EAAEK,IAAF,EAAQG,IAAR,CAAa,MAAb,CAAb;eAAd,EAAiD4L,OAAjD,EAtCc;oBAAA,GAwCnBqX,cAAc/B,OAAd,CACXkC,SADW,EAEX;wBAAA;0BAAA;oBAAA;oCAAA;oCAAA;;eAFW,CAxCmB;wBAmDCtV,MAnDD,EAmDxBzD,KAnDwB,WAmDxBA,KAnDwB,EAmDjBmX,aAnDiB,WAmDjBA,aAnDiB;;;;oBAsD5BmC,iBAAiBnC,aAtDW;;;;;;qBAuDfiC,gBACb;oCAAA;4CAAA;0BAAA;oBAAA;oCAAA;8BAAA;4BAAA;;eADa,CAvDe;;;oBAAA;;;;;oCAqEzB3V,MADL;6BAEe,CAFf;gCAGkB;;;;+CAIbA,MA3EyB;;;;;;;;;GADpB;;;WA+EL,CAAC,CAACjQ,QAAQC,OA/EL;;;;eAAA,yBAmFMlE,GAnFN,EAmFW;;;;;;;;;qBACV+T,SAAS2V,MAAT,CAAgB1pB,GAAhB,CADU;;;;;;;;;;;;;CAnF3B,CAyFA;;"}
\ No newline at end of file
+{"version":3,"file":"mercury.js","sources":["../src/utils/text/normalize-spaces.js","../src/utils/text/extract-from-url.js","../src/utils/text/constants.js","../src/utils/text/page-num-from-url.js","../src/utils/text/remove-anchor.js","../src/utils/text/article-base-url.js","../src/utils/text/has-sentence-end.js","../src/utils/text/excerpt-content.js","../src/utils/text/get-encoding.js","../src/resource/utils/constants.js","../src/resource/utils/fetch-resource.js","../src/resource/utils/dom/normalize-meta-tags.js","../src/utils/dom/constants.js","../src/utils/dom/strip-unlikely-candidates.js","../src/utils/dom/brs-to-ps.js","../src/utils/dom/paragraphize.js","../src/utils/dom/convert-to-paragraphs.js","../src/utils/dom/convert-node-to.js","../src/utils/dom/clean-images.js","../src/utils/dom/mark-to-keep.js","../src/utils/dom/strip-junk-tags.js","../src/utils/dom/clean-h-ones.js","../src/utils/dom/clean-attributes.js","../src/utils/dom/remove-empty.js","../src/extractors/generic/content/scoring/constants.js","../src/extractors/generic/content/scoring/get-weight.js","../src/extractors/generic/content/scoring/get-score.js","../src/extractors/generic/content/scoring/score-commas.js","../src/extractors/generic/content/scoring/score-length.js","../src/extractors/generic/content/scoring/score-paragraph.js","../src/extractors/generic/content/scoring/set-score.js","../src/extractors/generic/content/scoring/add-score.js","../src/extractors/generic/content/scoring/add-to-parent.js","../src/extractors/generic/content/scoring/get-or-init-score.js","../src/extractors/generic/content/scoring/score-node.js","../src/extractors/generic/content/scoring/score-content.js","../src/extractors/generic/content/scoring/merge-siblings.js","../src/extractors/generic/content/scoring/find-top-candidate.js","../src/utils/dom/clean-tags.js","../src/utils/dom/clean-headers.js","../src/utils/dom/rewrite-top-level.js","../src/utils/dom/make-links-absolute.js","../src/utils/dom/link-density.js","../src/utils/dom/extract-from-meta.js","../src/utils/dom/extract-from-selectors.js","../src/utils/dom/strip-tags.js","../src/utils/dom/within-comment.js","../src/utils/dom/node-is-sufficient.js","../src/utils/dom/is-wordpress.js","../src/utils/dom/get-attrs.js","../src/utils/dom/set-attr.js","../src/utils/dom/set-attrs.js","../src/resource/utils/dom/constants.js","../src/resource/utils/dom/convert-lazy-loaded-images.js","../src/resource/utils/dom/clean.js","../src/resource/index.js","../src/utils/range.js","../src/utils/validate-url.js","../src/utils/merge-supported-domains.js","../src/extractors/add-extractor.js","../src/extractors/custom/blogspot.com/index.js","../src/extractors/custom/nymag.com/index.js","../src/extractors/custom/wikipedia.org/index.js","../src/extractors/custom/twitter.com/index.js","../src/extractors/custom/www.nytimes.com/index.js","../src/extractors/custom/www.theatlantic.com/index.js","../src/extractors/custom/www.newyorker.com/index.js","../src/extractors/custom/www.wired.com/index.js","../src/extractors/custom/www.msn.com/index.js","../src/extractors/custom/www.yahoo.com/index.js","../src/extractors/custom/www.buzzfeed.com/index.js","../src/extractors/custom/fandom.wikia.com/index.js","../src/extractors/custom/www.littlethings.com/index.js","../src/extractors/custom/www.politico.com/index.js","../src/extractors/custom/deadspin.com/index.js","../src/extractors/custom/www.broadwayworld.com/index.js","../src/extractors/custom/www.apartmenttherapy.com/index.js","../src/extractors/custom/medium.com/index.js","../src/extractors/custom/www.tmz.com/index.js","../src/extractors/custom/www.washingtonpost.com/index.js","../src/extractors/custom/www.huffingtonpost.com/index.js","../src/extractors/custom/newrepublic.com/index.js","../src/extractors/custom/money.cnn.com/index.js","../src/extractors/custom/www.theverge.com/index.js","../src/extractors/custom/www.cnn.com/index.js","../src/extractors/custom/www.aol.com/index.js","../src/extractors/custom/www.youtube.com/index.js","../src/extractors/custom/www.theguardian.com/index.js","../src/extractors/custom/www.sbnation.com/index.js","../src/extractors/custom/www.bloomberg.com/index.js","../src/extractors/custom/www.bustle.com/index.js","../src/extractors/custom/www.npr.org/index.js","../src/extractors/custom/www.recode.net/index.js","../src/extractors/custom/qz.com/index.js","../src/extractors/custom/www.dmagazine.com/index.js","../src/extractors/custom/www.reuters.com/index.js","../src/extractors/custom/mashable.com/index.js","../src/extractors/custom/www.chicagotribune.com/index.js","../src/extractors/custom/www.vox.com/index.js","../src/extractors/custom/news.nationalgeographic.com/index.js","../src/extractors/custom/www.nationalgeographic.com/index.js","../src/extractors/custom/www.latimes.com/index.js","../src/extractors/custom/pagesix.com/index.js","../src/extractors/custom/thefederalistpapers.org/index.js","../src/extractors/custom/www.cbssports.com/index.js","../src/extractors/custom/www.msnbc.com/index.js","../src/extractors/custom/www.thepoliticalinsider.com/index.js","../src/extractors/custom/www.mentalfloss.com/index.js","../src/extractors/custom/abcnews.go.com/index.js","../src/extractors/custom/www.nydailynews.com/index.js","../src/extractors/custom/www.cnbc.com/index.js","../src/extractors/custom/www.popsugar.com/index.js","../src/extractors/custom/observer.com/index.js","../src/extractors/custom/people.com/index.js","../src/extractors/custom/www.usmagazine.com/index.js","../src/extractors/custom/www.rollingstone.com/index.js","../src/extractors/custom/247sports.com/index.js","../src/extractors/custom/uproxx.com/index.js","../src/extractors/custom/www.eonline.com/index.js","../src/extractors/custom/www.miamiherald.com/index.js","../src/extractors/custom/www.refinery29.com/index.js","../src/extractors/custom/www.macrumors.com/index.js","../src/extractors/custom/www.androidcentral.com/index.js","../src/extractors/custom/www.si.com/index.js","../src/extractors/custom/www.rawstory.com/index.js","../src/extractors/custom/www.cnet.com/index.js","../src/extractors/custom/www.cinemablend.com/index.js","../src/extractors/custom/www.today.com/index.js","../src/extractors/custom/www.howtogeek.com/index.js","../src/extractors/custom/www.al.com/index.js","../src/extractors/custom/www.thepennyhoarder.com/index.js","../src/extractors/custom/www.westernjournalism.com/index.js","../src/extractors/custom/fusion.net/index.js","../src/extractors/custom/www.americanow.com/index.js","../src/extractors/custom/sciencefly.com/index.js","../src/extractors/custom/hellogiggles.com/index.js","../src/extractors/custom/thoughtcatalog.com/index.js","../src/extractors/custom/www.nj.com/index.js","../src/extractors/custom/www.inquisitr.com/index.js","../src/extractors/custom/www.nbcnews.com/index.js","../src/extractors/custom/fortune.com/index.js","../src/extractors/custom/www.linkedin.com/index.js","../src/extractors/custom/obamawhitehouse.archives.gov/index.js","../src/extractors/custom/www.opposingviews.com/index.js","../src/extractors/custom/www.prospectmagazine.co.uk/index.js","../src/extractors/custom/forward.com/index.js","../src/extractors/custom/www.qdaily.com/index.js","../src/extractors/custom/gothamist.com/index.js","../src/extractors/custom/www.fool.com/index.js","../src/extractors/custom/www.slate.com/index.js","../src/extractors/custom/ici.radio-canada.ca/index.js","../src/extractors/custom/www.fortinet.com/index.js","../src/extractors/custom/www.fastcompany.com/index.js","../src/extractors/custom/blisterreview.com/index.js","../src/extractors/custom/news.mynavi.jp/index.js","../src/extractors/custom/clinicaltrials.gov/index.js","../src/extractors/custom/github.com/index.js","../src/extractors/custom/www.reddit.com/index.js","../src/extractors/custom/otrs.com/index.js","../src/extractors/custom/www.ossnews.jp/index.js","../src/extractors/custom/buzzap.jp/index.js","../src/extractors/custom/www.asahi.com/index.js","../src/extractors/custom/www.sanwa.co.jp/index.js","../src/extractors/custom/www.elecom.co.jp/index.js","../src/extractors/custom/scan.netsecurity.ne.jp/index.js","../src/extractors/custom/jvndb.jvn.jp/index.js","../src/extractors/custom/genius.com/index.js","../src/extractors/custom/www.jnsa.org/index.js","../src/extractors/custom/phpspot.org/index.js","../src/extractors/custom/www.infoq.com/index.js","../src/extractors/custom/www.moongift.jp/index.js","../src/extractors/custom/www.itmedia.co.jp/index.js","../src/extractors/custom/www.publickey1.jp/index.js","../src/extractors/custom/takagi-hiromitsu.jp/index.js","../src/extractors/custom/bookwalker.jp/index.js","../src/extractors/custom/www.yomiuri.co.jp/index.js","../src/extractors/custom/japan.cnet.com/index.js","../src/extractors/custom/deadline.com/index.js","../src/extractors/custom/www.gizmodo.jp/index.js","../src/extractors/custom/getnews.jp/index.js","../src/extractors/custom/www.lifehacker.jp/index.js","../src/extractors/custom/sect.iij.ad.jp/index.js","../src/extractors/custom/www.oreilly.co.jp/index.js","../src/extractors/custom/www.ipa.go.jp/index.js","../src/extractors/custom/weekly.ascii.jp/index.js","../src/extractors/custom/techlog.iij.ad.jp/index.js","../src/extractors/custom/wired.jp/index.js","../src/extractors/custom/japan.zdnet.com/index.js","../src/extractors/custom/www.rbbtoday.com/index.js","../src/extractors/custom/www.lemonde.fr/index.js","../src/extractors/custom/www.phoronix.com/index.js","../src/extractors/custom/pitchfork.com/index.js","../src/extractors/custom/biorxiv.org/index.js","../src/extractors/custom/epaper.zeit.de/index.js","../src/extractors/custom/www.ladbible.com/index.js","../src/extractors/custom/timesofindia.indiatimes.com/index.js","../src/extractors/custom/ma.ttias.be/index.js","../src/extractors/custom/pastebin.com/index.js","../src/extractors/custom/www.abendblatt.de/index.js","../src/extractors/custom/www.gruene.de/index.js","../src/extractors/custom/www.engadget.com/index.js","../src/extractors/custom/arstechnica.com/index.js","../src/extractors/custom/www.ndtv.com/index.js","../src/extractors/custom/www.spektrum.de/index.js","../src/extractors/all.js","../src/cleaners/constants.js","../src/cleaners/author.js","../src/cleaners/lead-image-url.js","../src/cleaners/dek.js","../src/cleaners/date-published.js","../src/cleaners/content.js","../src/cleaners/title.js","../src/cleaners/resolve-split-title.js","../src/cleaners/index.js","../src/extractors/generic/content/extract-best-node.js","../src/extractors/generic/content/extractor.js","../src/extractors/generic/title/constants.js","../src/extractors/generic/title/extractor.js","../src/extractors/generic/author/constants.js","../src/extractors/generic/author/extractor.js","../src/extractors/generic/date-published/constants.js","../src/extractors/generic/date-published/extractor.js","../src/extractors/generic/dek/extractor.js","../src/extractors/generic/lead-image-url/constants.js","../src/extractors/generic/lead-image-url/score-image.js","../src/extractors/generic/lead-image-url/extractor.js","../src/extractors/generic/next-page-url/scoring/utils/score-similarity.js","../src/extractors/generic/next-page-url/scoring/utils/score-link-text.js","../src/extractors/generic/next-page-url/scoring/utils/score-page-in-link.js","../src/extractors/generic/next-page-url/scoring/constants.js","../src/extractors/generic/next-page-url/scoring/utils/score-extraneous-links.js","../src/extractors/generic/next-page-url/scoring/utils/score-by-parents.js","../src/extractors/generic/next-page-url/scoring/utils/score-prev-link.js","../src/extractors/generic/next-page-url/scoring/utils/should-score.js","../src/extractors/generic/next-page-url/scoring/utils/score-base-url.js","../src/extractors/generic/next-page-url/scoring/utils/score-next-link-text.js","../src/extractors/generic/next-page-url/scoring/utils/score-cap-links.js","../src/extractors/generic/next-page-url/scoring/score-links.js","../src/extractors/generic/next-page-url/extractor.js","../src/extractors/generic/url/constants.js","../src/extractors/generic/url/extractor.js","../src/extractors/generic/excerpt/constants.js","../src/extractors/generic/excerpt/extractor.js","../src/extractors/generic/word-count/extractor.js","../src/extractors/generic/index.js","../src/extractors/detect-by-html.js","../src/extractors/get-extractor.js","../src/extractors/root-extractor.js","../src/extractors/collect-all-pages.js","../src/mercury.js"],"sourcesContent":["const NORMALIZE_RE = /\\s{2,}(?![^<>]*<\\/(pre|code|textarea)>)/g;\n\nexport default function normalizeSpaces(text) {\n return text.replace(NORMALIZE_RE, ' ').trim();\n}\n","// Given a node type to search for, and a list of regular expressions,\n// look to see if this extraction can be found in the URL. Expects\n// that each expression in r_list will return group(1) as the proper\n// string to be cleaned.\n// Only used for date_published currently.\nexport default function extractFromUrl(url, regexList) {\n const matchRe = regexList.find(re => re.test(url));\n if (matchRe) {\n return matchRe.exec(url)[1];\n }\n\n return null;\n}\n","// An expression that looks to try to find the page digit within a URL, if\n// it exists.\n// Matches:\n// page=1\n// pg=1\n// p=1\n// paging=12\n// pag=7\n// pagination/1\n// paging/88\n// pa/83\n// p/11\n//\n// Does not match:\n// pg=102\n// page:2\nexport const PAGE_IN_HREF_RE = new RegExp(\n '(page|paging|(p(a|g|ag)?(e|enum|ewanted|ing|ination)))?(=|/)([0-9]{1,3})',\n 'i'\n);\n\nexport const HAS_ALPHA_RE = /[a-z]/i;\n\nexport const IS_ALPHA_RE = /^[a-z]+$/i;\nexport const IS_DIGIT_RE = /^[0-9]+$/i;\n\nexport const ENCODING_RE = /charset=([\\w-]+)\\b/;\nexport const DEFAULT_ENCODING = 'utf-8';\n","import { PAGE_IN_HREF_RE } from './constants';\n\nexport default function pageNumFromUrl(url) {\n const matches = url.match(PAGE_IN_HREF_RE);\n if (!matches) return null;\n\n const pageNum = parseInt(matches[6], 10);\n\n // Return pageNum < 100, otherwise\n // return null\n return pageNum < 100 ? pageNum : null;\n}\n","export default function removeAnchor(url) {\n return url.split('#')[0].replace(/\\/$/, '');\n}\n","import URL from 'url';\n\nimport {\n HAS_ALPHA_RE,\n IS_ALPHA_RE,\n IS_DIGIT_RE,\n PAGE_IN_HREF_RE,\n} from './constants';\n\nfunction isGoodSegment(segment, index, firstSegmentHasLetters) {\n let goodSegment = true;\n\n // If this is purely a number, and it's the first or second\n // url_segment, it's probably a page number. Remove it.\n if (index < 2 && IS_DIGIT_RE.test(segment) && segment.length < 3) {\n goodSegment = true;\n }\n\n // If this is the first url_segment and it's just \"index\",\n // remove it\n if (index === 0 && segment.toLowerCase() === 'index') {\n goodSegment = false;\n }\n\n // If our first or second url_segment is smaller than 3 characters,\n // and the first url_segment had no alphas, remove it.\n if (index < 2 && segment.length < 3 && !firstSegmentHasLetters) {\n goodSegment = false;\n }\n\n return goodSegment;\n}\n\n// Take a URL, and return the article base of said URL. That is, no\n// pagination data exists in it. Useful for comparing to other links\n// that might have pagination data within them.\nexport default function articleBaseUrl(url, parsed) {\n const parsedUrl = parsed || URL.parse(url);\n const { protocol, host, path } = parsedUrl;\n\n let firstSegmentHasLetters = false;\n const cleanedSegments = path\n .split('/')\n .reverse()\n .reduce((acc, rawSegment, index) => {\n let segment = rawSegment;\n\n // Split off and save anything that looks like a file type.\n if (segment.includes('.')) {\n const [possibleSegment, fileExt] = segment.split('.');\n if (IS_ALPHA_RE.test(fileExt)) {\n segment = possibleSegment;\n }\n }\n\n // If our first or second segment has anything looking like a page\n // number, remove it.\n if (PAGE_IN_HREF_RE.test(segment) && index < 2) {\n segment = segment.replace(PAGE_IN_HREF_RE, '');\n }\n\n // If we're on the first segment, check to see if we have any\n // characters in it. The first segment is actually the last bit of\n // the URL, and this will be helpful to determine if we're on a URL\n // segment that looks like \"/2/\" for example.\n if (index === 0) {\n firstSegmentHasLetters = HAS_ALPHA_RE.test(segment);\n }\n\n // If it's not marked for deletion, push it to cleaned_segments.\n if (isGoodSegment(segment, index, firstSegmentHasLetters)) {\n acc.push(segment);\n }\n\n return acc;\n }, []);\n\n return `${protocol}//${host}${cleanedSegments.reverse().join('/')}`;\n}\n","// Given a string, return True if it appears to have an ending sentence\n// within it, false otherwise.\nconst SENTENCE_END_RE = new RegExp('.( |$)');\nexport default function hasSentenceEnd(text) {\n return SENTENCE_END_RE.test(text);\n}\n","export default function excerptContent(content, words = 10) {\n return content\n .trim()\n .split(/\\s+/)\n .slice(0, words)\n .join(' ');\n}\n","import iconv from 'iconv-lite';\nimport { DEFAULT_ENCODING, ENCODING_RE } from './constants';\n\n// check a string for encoding; this is\n// used in our fetchResource function to\n// ensure correctly encoded responses\nexport default function getEncoding(str) {\n let encoding = DEFAULT_ENCODING;\n const matches = ENCODING_RE.exec(str);\n if (matches !== null) {\n [, str] = matches;\n }\n if (iconv.encodingExists(str)) {\n encoding = str;\n }\n return encoding;\n}\n","import cheerio from 'cheerio';\n\n// Browser does not like us setting user agent\nexport const REQUEST_HEADERS = cheerio.browser\n ? {}\n : {\n 'User-Agent':\n 'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.36',\n };\n\n// The number of milliseconds to attempt to fetch a resource before timing out.\nexport const FETCH_TIMEOUT = 10000;\n\n// Content types that we do not extract content from\nconst BAD_CONTENT_TYPES = [\n 'audio/mpeg',\n 'image/gif',\n 'image/jpeg',\n 'image/jpg',\n];\n\nexport const BAD_CONTENT_TYPES_RE = new RegExp(\n `^(${BAD_CONTENT_TYPES.join('|')})$`,\n 'i'\n);\n\n// Use this setting as the maximum size an article can be\n// for us to attempt parsing. Defaults to 5 MB.\nexport const MAX_CONTENT_LENGTH = 5242880;\n\n// Turn the global proxy on or off\n// Proxying is not currently enabled in Python source\n// so not implementing logic in port.\nexport const PROXY_DOMAINS = false;\nexport const REQUESTS_PROXIES = {\n http: 'http://38.98.105.139:33333',\n https: 'http://38.98.105.139:33333',\n};\n\nexport const DOMAINS_TO_PROXY = ['nih.gov', 'gutenberg.org'];\n","import URL from 'url';\nimport request from 'postman-request';\n\nimport {\n REQUEST_HEADERS,\n FETCH_TIMEOUT,\n BAD_CONTENT_TYPES_RE,\n MAX_CONTENT_LENGTH,\n} from './constants';\n\nfunction get(options) {\n return new Promise((resolve, reject) => {\n request(options, (err, response, body) => {\n if (err) {\n reject(err);\n } else {\n resolve({ body, response });\n }\n });\n });\n}\n\n// Evaluate a response to ensure it's something we should be keeping.\n// This does not validate in the sense of a response being 200 or not.\n// Validation here means that we haven't found reason to bail from\n// further processing of this url.\n\nexport function validateResponse(response, parseNon200 = false) {\n // Check if we got a valid status code\n // This isn't great, but I'm requiring a statusMessage to be set\n // before short circuiting b/c nock doesn't set it in tests\n // statusMessage only not set in nock response, in which case\n // I check statusCode, which is currently only 200 for OK responses\n // in tests\n if (\n (response.statusMessage && response.statusMessage !== 'OK') ||\n response.statusCode !== 200\n ) {\n if (!response.statusCode) {\n throw new Error(\n `Unable to fetch content. Original exception was ${response.error}`\n );\n } else if (!parseNon200) {\n throw new Error(\n `Resource returned a response status code of ${\n response.statusCode\n } and resource was instructed to reject non-200 status codes.`\n );\n }\n }\n\n const {\n 'content-type': contentType,\n 'content-length': contentLength,\n } = response.headers;\n\n // Check that the content is not in BAD_CONTENT_TYPES\n if (BAD_CONTENT_TYPES_RE.test(contentType)) {\n throw new Error(\n `Content-type for this resource was ${contentType} and is not allowed.`\n );\n }\n\n // Check that the content length is below maximum\n if (contentLength > MAX_CONTENT_LENGTH) {\n throw new Error(\n `Content for this resource was too large. Maximum content length is ${MAX_CONTENT_LENGTH}.`\n );\n }\n\n return true;\n}\n\n// Grabs the last two pieces of the URL and joins them back together\n// This is to get the 'livejournal.com' from 'erotictrains.livejournal.com'\nexport function baseDomain({ host }) {\n return host\n .split('.')\n .slice(-2)\n .join('.');\n}\n\n// Set our response attribute to the result of fetching our URL.\n// TODO: This should gracefully handle timeouts and raise the\n// proper exceptions on the many failure cases of HTTP.\n// TODO: Ensure we are not fetching something enormous. Always return\n// unicode content for HTML, with charset conversion.\n\nexport default async function fetchResource(url, parsedUrl, headers = {}) {\n parsedUrl = parsedUrl || URL.parse(encodeURI(url));\n const options = {\n url: parsedUrl.href,\n headers: { ...REQUEST_HEADERS, ...headers },\n timeout: FETCH_TIMEOUT,\n // Accept cookies\n jar: true,\n // Set to null so the response returns as binary and body as buffer\n // https://github.com/request/request#requestoptions-callback\n encoding: null,\n // Accept and decode gzip\n gzip: true,\n // Follow any non-GET redirects\n followAllRedirects: true,\n ...(typeof window !== 'undefined'\n ? {}\n : {\n // Follow GET redirects; this option is for Node only\n followRedirect: true,\n }),\n };\n\n const { response, body } = await get(options);\n\n try {\n validateResponse(response);\n return {\n body,\n response,\n };\n } catch (e) {\n return {\n error: true,\n message: e.message,\n };\n }\n}\n","function convertMetaProp($, from, to) {\n $(`meta[${from}]`).each((_, node) => {\n const $node = $(node);\n\n const value = $node.attr(from);\n $node.attr(to, value);\n $node.removeAttr(from);\n });\n\n return $;\n}\n\n// For ease of use in extracting from meta tags,\n// replace the \"content\" attribute on meta tags with the\n// \"value\" attribute.\n//\n// In addition, normalize 'property' attributes to 'name' for ease of\n// querying later. See, e.g., og or twitter meta tags.\n\nexport default function normalizeMetaTags($) {\n $ = convertMetaProp($, 'content', 'value');\n $ = convertMetaProp($, 'property', 'name');\n return $;\n}\n","// Spacer images to be removed\nexport const SPACER_RE = new RegExp('transparent|spacer|blank', 'i');\n\n// The class we will use to mark elements we want to keep\n// but would normally remove\nexport const KEEP_CLASS = 'mercury-parser-keep';\n\nexport const KEEP_SELECTORS = [\n 'iframe[src^=\"https://www.youtube.com\"]',\n 'iframe[src^=\"https://www.youtube-nocookie.com\"]',\n 'iframe[src^=\"http://www.youtube.com\"]',\n 'iframe[src^=\"https://player.vimeo\"]',\n 'iframe[src^=\"http://player.vimeo\"]',\n 'iframe[src^=\"https://www.redditmedia.com\"]',\n];\n\n// A list of tags to strip from the output if we encounter them.\nexport const STRIP_OUTPUT_TAGS = [\n 'title',\n 'script',\n 'noscript',\n 'link',\n 'style',\n 'hr',\n 'embed',\n 'iframe',\n 'object',\n];\n\n// cleanAttributes\nexport const REMOVE_ATTRS = ['style', 'align'];\nexport const REMOVE_ATTR_SELECTORS = REMOVE_ATTRS.map(\n selector => `[${selector}]`\n);\nexport const REMOVE_ATTR_LIST = REMOVE_ATTRS.join(',');\nexport const WHITELIST_ATTRS = [\n 'src',\n 'srcset',\n 'sizes',\n 'type',\n 'href',\n 'class',\n 'id',\n 'alt',\n 'xlink:href',\n 'width',\n 'height',\n];\n\nexport const WHITELIST_ATTRS_RE = new RegExp(\n `^(${WHITELIST_ATTRS.join('|')})$`,\n 'i'\n);\n\n// removeEmpty\nexport const REMOVE_EMPTY_TAGS = ['p'];\nexport const REMOVE_EMPTY_SELECTORS = REMOVE_EMPTY_TAGS.map(\n tag => `${tag}:empty`\n).join(',');\n\n// cleanTags\nexport const CLEAN_CONDITIONALLY_TAGS = [\n 'ul',\n 'ol',\n 'table',\n 'div',\n 'button',\n 'form',\n].join(',');\n\n// cleanHeaders\nconst HEADER_TAGS = ['h2', 'h3', 'h4', 'h5', 'h6'];\nexport const HEADER_TAG_LIST = HEADER_TAGS.join(',');\n\n// // CONTENT FETCHING CONSTANTS ////\n\n// A list of strings that can be considered unlikely candidates when\n// extracting content from a resource. These strings are joined together\n// and then tested for existence using re:test, so may contain simple,\n// non-pipe style regular expression queries if necessary.\nexport const UNLIKELY_CANDIDATES_BLACKLIST = [\n 'ad-break',\n 'adbox',\n 'advert',\n 'addthis',\n 'agegate',\n 'aux',\n 'blogger-labels',\n 'combx',\n 'comment',\n 'conversation',\n 'disqus',\n 'entry-unrelated',\n 'extra',\n 'foot',\n // 'form', // This is too generic, has too many false positives\n 'header',\n 'hidden',\n 'loader',\n 'login', // Note: This can hit 'blogindex'.\n 'menu',\n 'meta',\n 'nav',\n 'outbrain',\n 'pager',\n 'pagination',\n 'predicta', // readwriteweb inline ad box\n 'presence_control_external', // lifehacker.com container full of false positives\n 'popup',\n 'printfriendly',\n 'related',\n 'remove',\n 'remark',\n 'rss',\n 'share',\n 'shoutbox',\n 'sidebar',\n 'sociable',\n 'sponsor',\n 'taboola',\n 'tools',\n];\n\n// A list of strings that can be considered LIKELY candidates when\n// extracting content from a resource. Essentially, the inverse of the\n// blacklist above - if something matches both blacklist and whitelist,\n// it is kept. This is useful, for example, if something has a className\n// of \"rss-content entry-content\". It matched 'rss', so it would normally\n// be removed, however, it's also the entry content, so it should be left\n// alone.\n//\n// These strings are joined together and then tested for existence using\n// re:test, so may contain simple, non-pipe style regular expression queries\n// if necessary.\nexport const UNLIKELY_CANDIDATES_WHITELIST = [\n 'and',\n 'article',\n 'body',\n 'blogindex',\n 'column',\n 'content',\n 'entry-content-asset',\n 'format', // misuse of form\n 'hfeed',\n 'hentry',\n 'hatom',\n 'main',\n 'page',\n 'posts',\n 'shadow',\n];\n\n// A list of tags which, if found inside, should cause a to NOT\n// be turned into a paragraph tag. Shallow div tags without these elements\n// should be turned into tags.\nexport const DIV_TO_P_BLOCK_TAGS = [\n 'a',\n 'blockquote',\n 'dl',\n 'div',\n 'img',\n 'p',\n 'pre',\n 'table',\n].join(',');\n\n// A list of tags that should be ignored when trying to find the top candidate\n// for a document.\nexport const NON_TOP_CANDIDATE_TAGS = [\n 'br',\n 'b',\n 'i',\n 'label',\n 'hr',\n 'area',\n 'base',\n 'basefont',\n 'input',\n 'img',\n 'link',\n 'meta',\n];\n\nexport const NON_TOP_CANDIDATE_TAGS_RE = new RegExp(\n `^(${NON_TOP_CANDIDATE_TAGS.join('|')})$`,\n 'i'\n);\n\n// A list of selectors that specify, very clearly, either hNews or other\n// very content-specific style content, like Blogger templates.\n// More examples here: http://microformats.org/wiki/blog-post-formats\nexport const HNEWS_CONTENT_SELECTORS = [\n ['.hentry', '.entry-content'],\n ['entry', '.entry-content'],\n ['.entry', '.entry_content'],\n ['.post', '.postbody'],\n ['.post', '.post_body'],\n ['.post', '.post-body'],\n];\n\nexport const PHOTO_HINTS = ['figure', 'photo', 'image', 'caption'];\nexport const PHOTO_HINTS_RE = new RegExp(PHOTO_HINTS.join('|'), 'i');\n\n// A list of strings that denote a positive scoring for this content as being\n// an article container. Checked against className and id.\n//\n// TODO: Perhaps have these scale based on their odds of being quality?\nexport const POSITIVE_SCORE_HINTS = [\n 'article',\n 'articlecontent',\n 'instapaper_body',\n 'blog',\n 'body',\n 'content',\n 'entry-content-asset',\n 'entry',\n 'hentry',\n 'main',\n 'Normal',\n 'page',\n 'pagination',\n 'permalink',\n 'post',\n 'story',\n 'text',\n '[-_]copy', // usatoday\n '\\\\Bcopy',\n];\n\n// The above list, joined into a matching regular expression\nexport const POSITIVE_SCORE_RE = new RegExp(\n POSITIVE_SCORE_HINTS.join('|'),\n 'i'\n);\n\n// Readability publisher-specific guidelines\nexport const READABILITY_ASSET = new RegExp('entry-content-asset', 'i');\n\n// A list of strings that denote a negative scoring for this content as being\n// an article container. Checked against className and id.\n//\n// TODO: Perhaps have these scale based on their odds of being quality?\nexport const NEGATIVE_SCORE_HINTS = [\n 'adbox',\n 'advert',\n 'author',\n 'bio',\n 'bookmark',\n 'bottom',\n 'byline',\n 'clear',\n 'com-',\n 'combx',\n 'comment',\n 'comment\\\\B',\n 'contact',\n 'copy',\n 'credit',\n 'crumb',\n 'date',\n 'deck',\n 'excerpt',\n 'featured', // tnr.com has a featured_content which throws us off\n 'foot',\n 'footer',\n 'footnote',\n 'graf',\n 'head',\n 'info',\n 'infotext', // newscientist.com copyright\n 'instapaper_ignore',\n 'jump',\n 'linebreak',\n 'link',\n 'masthead',\n 'media',\n 'meta',\n 'modal',\n 'outbrain', // slate.com junk\n 'promo',\n 'pr_', // autoblog - press release\n 'related',\n 'respond',\n 'roundcontent', // lifehacker restricted content warning\n 'scroll',\n 'secondary',\n 'share',\n 'shopping',\n 'shoutbox',\n 'side',\n 'sidebar',\n 'sponsor',\n 'stamp',\n 'sub',\n 'summary',\n 'tags',\n 'tools',\n 'widget',\n];\n// The above list, joined into a matching regular expression\nexport const NEGATIVE_SCORE_RE = new RegExp(\n NEGATIVE_SCORE_HINTS.join('|'),\n 'i'\n);\n\n// XPath to try to determine if a page is wordpress. Not always successful.\nexport const IS_WP_SELECTOR = 'meta[name=generator][value^=WordPress]';\n\n// Match a digit. Pretty clear.\nexport const DIGIT_RE = new RegExp('[0-9]');\n\n// A list of words that, if found in link text or URLs, likely mean that\n// this link is not a next page link.\nexport const EXTRANEOUS_LINK_HINTS = [\n 'print',\n 'archive',\n 'comment',\n 'discuss',\n 'e-mail',\n 'email',\n 'share',\n 'reply',\n 'all',\n 'login',\n 'sign',\n 'single',\n 'adx',\n 'entry-unrelated',\n];\nexport const EXTRANEOUS_LINK_HINTS_RE = new RegExp(\n EXTRANEOUS_LINK_HINTS.join('|'),\n 'i'\n);\n\n// Match any phrase that looks like it could be page, or paging, or pagination\nexport const PAGE_RE = new RegExp('pag(e|ing|inat)', 'i');\n\n// Match any link text/classname/id that looks like it could mean the next\n// page. Things like: next, continue, >, >>, » but not >|, »| as those can\n// mean last page.\n// export const NEXT_LINK_TEXT_RE = new RegExp('(next|weiter|continue|>([^\\|]|$)|»([^\\|]|$))', 'i');\nexport const NEXT_LINK_TEXT_RE = /(next|weiter|continue|>([^|]|$)|»([^|]|$))/i;\n\n// Match any link text/classname/id that looks like it is an end link: things\n// like \"first\", \"last\", \"end\", etc.\nexport const CAP_LINK_TEXT_RE = new RegExp('(first|last|end)', 'i');\n\n// Match any link text/classname/id that looks like it means the previous\n// page.\nexport const PREV_LINK_TEXT_RE = new RegExp('(prev|earl|old|new|<|«)', 'i');\n\n// Match 2 or more consecutive tags\nexport const BR_TAGS_RE = new RegExp('( ]*>[ \\n\\r\\t]*){2,}', 'i');\n\n// Match 1 BR tag.\nexport const BR_TAG_RE = new RegExp(' ]*>', 'i');\n\n// A list of all of the block level tags known in HTML5 and below. Taken from\n// http://bit.ly/qneNIT\nexport const BLOCK_LEVEL_TAGS = [\n 'article',\n 'aside',\n 'blockquote',\n 'body',\n 'br',\n 'button',\n 'canvas',\n 'caption',\n 'col',\n 'colgroup',\n 'dd',\n 'div',\n 'dl',\n 'dt',\n 'embed',\n 'fieldset',\n 'figcaption',\n 'figure',\n 'footer',\n 'form',\n 'h1',\n 'h2',\n 'h3',\n 'h4',\n 'h5',\n 'h6',\n 'header',\n 'hgroup',\n 'hr',\n 'li',\n 'map',\n 'object',\n 'ol',\n 'output',\n 'p',\n 'pre',\n 'progress',\n 'section',\n 'table',\n 'tbody',\n 'textarea',\n 'tfoot',\n 'th',\n 'thead',\n 'tr',\n 'ul',\n 'video',\n];\nexport const BLOCK_LEVEL_TAGS_RE = new RegExp(\n `^(${BLOCK_LEVEL_TAGS.join('|')})$`,\n 'i'\n);\n\n// The removal is implemented as a blacklist and whitelist, this test finds\n// blacklisted elements that aren't whitelisted. We do this all in one\n// expression-both because it's only one pass, and because this skips the\n// serialization for whitelisted nodes.\nconst candidatesBlacklist = UNLIKELY_CANDIDATES_BLACKLIST.join('|');\nexport const CANDIDATES_BLACKLIST = new RegExp(candidatesBlacklist, 'i');\n\nconst candidatesWhitelist = UNLIKELY_CANDIDATES_WHITELIST.join('|');\nexport const CANDIDATES_WHITELIST = new RegExp(candidatesWhitelist, 'i');\n\nexport const UNLIKELY_RE = new RegExp(\n `!(${candidatesWhitelist})|(${candidatesBlacklist})`,\n 'i'\n);\n\nexport const PARAGRAPH_SCORE_TAGS = new RegExp('^(p|li|span|pre)$', 'i');\nexport const CHILD_CONTENT_TAGS = new RegExp('^(td|blockquote|ol|ul|dl)$', 'i');\nexport const BAD_TAGS = new RegExp('^(address|form)$', 'i');\n\nexport const HTML_OR_BODY_RE = new RegExp('^(html|body)$', 'i');\n","import { CANDIDATES_WHITELIST, CANDIDATES_BLACKLIST } from './constants';\n\nexport default function stripUnlikelyCandidates($) {\n // Loop through the provided document and remove any non-link nodes\n // that are unlikely candidates for article content.\n //\n // Links are ignored because there are very often links to content\n // that are identified as non-body-content, but may be inside\n // article-like content.\n //\n // :param $: a cheerio object to strip nodes from\n // :return $: the cleaned cheerio object\n $('*')\n .not('a')\n .each((index, node) => {\n const $node = $(node);\n const classes = $node.attr('class');\n const id = $node.attr('id');\n if (!id && !classes) return;\n\n const classAndId = `${classes || ''} ${id || ''}`;\n if (CANDIDATES_WHITELIST.test(classAndId)) {\n return;\n }\n if (CANDIDATES_BLACKLIST.test(classAndId)) {\n $node.remove();\n }\n });\n\n return $;\n}\n","import { paragraphize } from './index';\n\n// ## NOTES:\n// Another good candidate for refactoring/optimizing.\n// Very imperative code, I don't love it. - AP\n\n// Given cheerio object, convert consecutive tags into\n// tags instead.\n//\n// :param $: A cheerio object\n\nexport default function brsToPs($) {\n let collapsing = false;\n $('br').each((index, element) => {\n const $element = $(element);\n const nextElement = $element.next().get(0);\n\n if (nextElement && nextElement.tagName.toLowerCase() === 'br') {\n collapsing = true;\n $element.remove();\n } else if (collapsing) {\n collapsing = false;\n paragraphize(element, $, true);\n }\n });\n\n return $;\n}\n","import { BLOCK_LEVEL_TAGS_RE } from './constants';\n\n// Given a node, turn it into a P if it is not already a P, and\n// make sure it conforms to the constraints of a P tag (I.E. does\n// not contain any other block tags.)\n//\n// If the node is a , it treats the following inline siblings\n// as if they were its children.\n//\n// :param node: The node to paragraphize; this is a raw node\n// :param $: The cheerio object to handle dom manipulation\n// :param br: Whether or not the passed node is a br\n\nexport default function paragraphize(node, $, br = false) {\n const $node = $(node);\n\n if (br) {\n let sibling = node.nextSibling;\n const p = $('');\n\n // while the next node is text or not a block level element\n // append it to a new p node\n while (\n sibling &&\n !(sibling.tagName && BLOCK_LEVEL_TAGS_RE.test(sibling.tagName))\n ) {\n const { nextSibling } = sibling;\n $(sibling).appendTo(p);\n sibling = nextSibling;\n }\n\n $node.replaceWith(p);\n $node.remove();\n return $;\n }\n\n return $;\n}\n","import { brsToPs, convertNodeTo } from 'utils/dom';\n\nimport { DIV_TO_P_BLOCK_TAGS } from './constants';\n\nfunction convertDivs($) {\n $('div').each((index, div) => {\n const $div = $(div);\n const convertible = $div.children(DIV_TO_P_BLOCK_TAGS).length === 0;\n\n if (convertible) {\n convertNodeTo($div, $, 'p');\n }\n });\n\n return $;\n}\n\nfunction convertSpans($) {\n $('span').each((index, span) => {\n const $span = $(span);\n const convertible = $span.parents('p, div, li, figcaption').length === 0;\n if (convertible) {\n convertNodeTo($span, $, 'p');\n }\n });\n\n return $;\n}\n\n// Loop through the provided doc, and convert any p-like elements to\n// actual paragraph tags.\n//\n// Things fitting this criteria:\n// * Multiple consecutive tags.\n// * tags without block level elements inside of them\n// * tags who are not children of or tags.\n//\n// :param $: A cheerio object to search\n// :return cheerio object with new p elements\n// (By-reference mutation, though. Returned just for convenience.)\n\nexport default function convertToParagraphs($) {\n $ = brsToPs($);\n $ = convertDivs($);\n $ = convertSpans($);\n\n return $;\n}\n","import { getAttrs } from 'utils/dom';\n\nexport default function convertNodeTo($node, $, tag = 'p') {\n const node = $node.get(0);\n if (!node) {\n return $;\n }\n const attrs = getAttrs(node) || {};\n\n const attribString = Reflect.ownKeys(attrs)\n .map(key => `${key}=${attrs[key]}`)\n .join(' ');\n let html;\n\n if ($.browser) {\n // In the browser, the contents of noscript tags aren't rendered, therefore\n // transforms on the noscript tag (commonly used for lazy-loading) don't work\n // as expected. This test case handles that\n html =\n node.tagName.toLowerCase() === 'noscript' ? $node.text() : $node.html();\n } else {\n html = $node.contents();\n }\n $node.replaceWith(`<${tag} ${attribString}>${html}${tag}>`);\n return $;\n}\n","import { SPACER_RE } from './constants';\n\nfunction cleanForHeight($img, $) {\n const height = parseInt($img.attr('height'), 10);\n const width = parseInt($img.attr('width'), 10) || 20;\n\n // Remove images that explicitly have very small heights or\n // widths, because they are most likely shims or icons,\n // which aren't very useful for reading.\n if ((height || 20) < 10 || width < 10) {\n $img.remove();\n } else if (height) {\n // Don't ever specify a height on images, so that we can\n // scale with respect to width without screwing up the\n // aspect ratio.\n $img.removeAttr('height');\n }\n\n return $;\n}\n\n// Cleans out images where the source string matches transparent/spacer/etc\n// TODO This seems very aggressive - AP\nfunction removeSpacers($img, $) {\n if (SPACER_RE.test($img.attr('src'))) {\n $img.remove();\n }\n\n return $;\n}\n\nexport default function cleanImages($article, $) {\n $article.find('img').each((index, img) => {\n const $img = $(img);\n\n cleanForHeight($img, $);\n removeSpacers($img, $);\n });\n\n return $;\n}\n","import URL from 'url';\n\nimport { KEEP_SELECTORS, KEEP_CLASS } from './constants';\n\nexport default function markToKeep(article, $, url, tags = []) {\n if (tags.length === 0) {\n tags = KEEP_SELECTORS;\n }\n\n if (url) {\n const { protocol, hostname } = URL.parse(url);\n tags = [...tags, `iframe[src^=\"${protocol}//${hostname}\"]`];\n }\n\n $(tags.join(','), article).addClass(KEEP_CLASS);\n\n return $;\n}\n","import { STRIP_OUTPUT_TAGS, KEEP_CLASS } from './constants';\n\nexport default function stripJunkTags(article, $, tags = []) {\n if (tags.length === 0) {\n tags = STRIP_OUTPUT_TAGS;\n }\n\n // Remove matching elements, but ignore\n // any element with a class of mercury-parser-keep\n $(tags.join(','), article)\n .not(`.${KEEP_CLASS}`)\n .remove();\n\n return $;\n}\n","import { convertNodeTo } from 'utils/dom';\n\n// H1 tags are typically the article title, which should be extracted\n// by the title extractor instead. If there's less than 3 of them (<3),\n// strip them. Otherwise, turn 'em into H2s.\nexport default function cleanHOnes(article, $) {\n const $hOnes = $('h1', article);\n\n if ($hOnes.length < 3) {\n $hOnes.each((index, node) => $(node).remove());\n } else {\n $hOnes.each((index, node) => {\n convertNodeTo($(node), $, 'h2');\n });\n }\n\n return $;\n}\n","import { getAttrs, setAttrs } from 'utils/dom';\n\nimport { WHITELIST_ATTRS_RE, KEEP_CLASS } from './constants';\n\nfunction removeAllButWhitelist($article, $) {\n $article.find('*').each((index, node) => {\n const attrs = getAttrs(node);\n\n setAttrs(\n node,\n Reflect.ownKeys(attrs).reduce((acc, attr) => {\n if (WHITELIST_ATTRS_RE.test(attr)) {\n return { ...acc, [attr]: attrs[attr] };\n }\n\n return acc;\n }, {})\n );\n });\n\n // Remove the mercury-parser-keep class from result\n $(`.${KEEP_CLASS}`, $article).removeClass(KEEP_CLASS);\n\n return $article;\n}\n\n// Remove attributes like style or align\nexport default function cleanAttributes($article, $) {\n // Grabbing the parent because at this point\n // $article will be wrapped in a div which will\n // have a score set on it.\n return removeAllButWhitelist(\n $article.parent().length ? $article.parent() : $article,\n $\n );\n}\n","export default function removeEmpty($article, $) {\n $article.find('p').each((index, p) => {\n const $p = $(p);\n if ($p.find('iframe, img').length === 0 && $p.text().trim() === '')\n $p.remove();\n });\n\n return $;\n}\n","// // CONTENT FETCHING CONSTANTS ////\n\n// A list of strings that can be considered unlikely candidates when\n// extracting content from a resource. These strings are joined together\n// and then tested for existence using re:test, so may contain simple,\n// non-pipe style regular expression queries if necessary.\nexport const UNLIKELY_CANDIDATES_BLACKLIST = [\n 'ad-break',\n 'adbox',\n 'advert',\n 'addthis',\n 'agegate',\n 'aux',\n 'blogger-labels',\n 'combx',\n 'comment',\n 'conversation',\n 'disqus',\n 'entry-unrelated',\n 'extra',\n 'foot',\n 'form',\n 'header',\n 'hidden',\n 'loader',\n 'login', // Note: This can hit 'blogindex'.\n 'menu',\n 'meta',\n 'nav',\n 'pager',\n 'pagination',\n 'predicta', // readwriteweb inline ad box\n 'presence_control_external', // lifehacker.com container full of false positives\n 'popup',\n 'printfriendly',\n 'related',\n 'remove',\n 'remark',\n 'rss',\n 'share',\n 'shoutbox',\n 'sidebar',\n 'sociable',\n 'sponsor',\n 'tools',\n];\n\n// A list of strings that can be considered LIKELY candidates when\n// extracting content from a resource. Essentially, the inverse of the\n// blacklist above - if something matches both blacklist and whitelist,\n// it is kept. This is useful, for example, if something has a className\n// of \"rss-content entry-content\". It matched 'rss', so it would normally\n// be removed, however, it's also the entry content, so it should be left\n// alone.\n//\n// These strings are joined together and then tested for existence using\n// re:test, so may contain simple, non-pipe style regular expression queries\n// if necessary.\nexport const UNLIKELY_CANDIDATES_WHITELIST = [\n 'and',\n 'article',\n 'body',\n 'blogindex',\n 'column',\n 'content',\n 'entry-content-asset',\n 'format', // misuse of form\n 'hfeed',\n 'hentry',\n 'hatom',\n 'main',\n 'page',\n 'posts',\n 'shadow',\n];\n\n// A list of tags which, if found inside, should cause a to NOT\n// be turned into a paragraph tag. Shallow div tags without these elements\n// should be turned into tags.\nexport const DIV_TO_P_BLOCK_TAGS = [\n 'a',\n 'blockquote',\n 'dl',\n 'div',\n 'img',\n 'p',\n 'pre',\n 'table',\n].join(',');\n\n// A list of tags that should be ignored when trying to find the top candidate\n// for a document.\nexport const NON_TOP_CANDIDATE_TAGS = [\n 'br',\n 'b',\n 'i',\n 'label',\n 'hr',\n 'area',\n 'base',\n 'basefont',\n 'input',\n 'img',\n 'link',\n 'meta',\n];\n\nexport const NON_TOP_CANDIDATE_TAGS_RE = new RegExp(\n `^(${NON_TOP_CANDIDATE_TAGS.join('|')})$`,\n 'i'\n);\n\n// A list of selectors that specify, very clearly, either hNews or other\n// very content-specific style content, like Blogger templates.\n// More examples here: http://microformats.org/wiki/blog-post-formats\nexport const HNEWS_CONTENT_SELECTORS = [\n ['.hentry', '.entry-content'],\n ['entry', '.entry-content'],\n ['.entry', '.entry_content'],\n ['.post', '.postbody'],\n ['.post', '.post_body'],\n ['.post', '.post-body'],\n];\n\nexport const PHOTO_HINTS = ['figure', 'photo', 'image', 'caption'];\nexport const PHOTO_HINTS_RE = new RegExp(PHOTO_HINTS.join('|'), 'i');\n\n// A list of strings that denote a positive scoring for this content as being\n// an article container. Checked against className and id.\n//\n// TODO: Perhaps have these scale based on their odds of being quality?\nexport const POSITIVE_SCORE_HINTS = [\n 'article',\n 'articlecontent',\n 'instapaper_body',\n 'blog',\n 'body',\n 'content',\n 'entry-content-asset',\n 'entry',\n 'hentry',\n 'main',\n 'Normal',\n 'page',\n 'pagination',\n 'permalink',\n 'post',\n 'story',\n 'text',\n '[-_]copy', // usatoday\n '\\\\Bcopy',\n];\n\n// The above list, joined into a matching regular expression\nexport const POSITIVE_SCORE_RE = new RegExp(\n POSITIVE_SCORE_HINTS.join('|'),\n 'i'\n);\n\n// Readability publisher-specific guidelines\nexport const READABILITY_ASSET = new RegExp('entry-content-asset', 'i');\n\n// A list of strings that denote a negative scoring for this content as being\n// an article container. Checked against className and id.\n//\n// TODO: Perhaps have these scale based on their odds of being quality?\nexport const NEGATIVE_SCORE_HINTS = [\n 'adbox',\n 'advert',\n 'author',\n 'bio',\n 'bookmark',\n 'bottom',\n 'byline',\n 'clear',\n 'com-',\n 'combx',\n 'comment',\n 'comment\\\\B',\n 'contact',\n 'copy',\n 'credit',\n 'crumb',\n 'date',\n 'deck',\n 'excerpt',\n 'featured', // tnr.com has a featured_content which throws us off\n 'foot',\n 'footer',\n 'footnote',\n 'graf',\n 'head',\n 'info',\n 'infotext', // newscientist.com copyright\n 'instapaper_ignore',\n 'jump',\n 'linebreak',\n 'link',\n 'masthead',\n 'media',\n 'meta',\n 'modal',\n 'outbrain', // slate.com junk\n 'promo',\n 'pr_', // autoblog - press release\n 'related',\n 'respond',\n 'roundcontent', // lifehacker restricted content warning\n 'scroll',\n 'secondary',\n 'share',\n 'shopping',\n 'shoutbox',\n 'side',\n 'sidebar',\n 'sponsor',\n 'stamp',\n 'sub',\n 'summary',\n 'tags',\n 'tools',\n 'widget',\n];\n// The above list, joined into a matching regular expression\nexport const NEGATIVE_SCORE_RE = new RegExp(\n NEGATIVE_SCORE_HINTS.join('|'),\n 'i'\n);\n\n// Match a digit. Pretty clear.\nexport const DIGIT_RE = new RegExp('[0-9]');\n\n// Match 2 or more consecutive tags\nexport const BR_TAGS_RE = new RegExp('( ]*>[ \\n\\r\\t]*){2,}', 'i');\n\n// Match 1 BR tag.\nexport const BR_TAG_RE = new RegExp(' ]*>', 'i');\n\n// A list of all of the block level tags known in HTML5 and below. Taken from\n// http://bit.ly/qneNIT\nexport const BLOCK_LEVEL_TAGS = [\n 'article',\n 'aside',\n 'blockquote',\n 'body',\n 'br',\n 'button',\n 'canvas',\n 'caption',\n 'col',\n 'colgroup',\n 'dd',\n 'div',\n 'dl',\n 'dt',\n 'embed',\n 'fieldset',\n 'figcaption',\n 'figure',\n 'footer',\n 'form',\n 'h1',\n 'h2',\n 'h3',\n 'h4',\n 'h5',\n 'h6',\n 'header',\n 'hgroup',\n 'hr',\n 'li',\n 'map',\n 'object',\n 'ol',\n 'output',\n 'p',\n 'pre',\n 'progress',\n 'section',\n 'table',\n 'tbody',\n 'textarea',\n 'tfoot',\n 'th',\n 'thead',\n 'tr',\n 'ul',\n 'video',\n];\nexport const BLOCK_LEVEL_TAGS_RE = new RegExp(\n `^(${BLOCK_LEVEL_TAGS.join('|')})$`,\n 'i'\n);\n\n// The removal is implemented as a blacklist and whitelist, this test finds\n// blacklisted elements that aren't whitelisted. We do this all in one\n// expression-both because it's only one pass, and because this skips the\n// serialization for whitelisted nodes.\nconst candidatesBlacklist = UNLIKELY_CANDIDATES_BLACKLIST.join('|');\nexport const CANDIDATES_BLACKLIST = new RegExp(candidatesBlacklist, 'i');\n\nconst candidatesWhitelist = UNLIKELY_CANDIDATES_WHITELIST.join('|');\nexport const CANDIDATES_WHITELIST = new RegExp(candidatesWhitelist, 'i');\n\nexport const UNLIKELY_RE = new RegExp(\n `!(${candidatesWhitelist})|(${candidatesBlacklist})`,\n 'i'\n);\n\nexport const PARAGRAPH_SCORE_TAGS = new RegExp('^(p|li|span|pre)$', 'i');\nexport const CHILD_CONTENT_TAGS = new RegExp('^(td|blockquote|ol|ul|dl)$', 'i');\nexport const BAD_TAGS = new RegExp('^(address|form)$', 'i');\n\nexport const HTML_OR_BODY_RE = new RegExp('^(html|body)$', 'i');\n","import {\n NEGATIVE_SCORE_RE,\n POSITIVE_SCORE_RE,\n PHOTO_HINTS_RE,\n READABILITY_ASSET,\n} from './constants';\n\n// Get the score of a node based on its className and id.\nexport default function getWeight(node) {\n const classes = node.attr('class');\n const id = node.attr('id');\n let score = 0;\n\n if (id) {\n // if id exists, try to score on both positive and negative\n if (POSITIVE_SCORE_RE.test(id)) {\n score += 25;\n }\n if (NEGATIVE_SCORE_RE.test(id)) {\n score -= 25;\n }\n }\n\n if (classes) {\n if (score === 0) {\n // if classes exist and id did not contribute to score\n // try to score on both positive and negative\n if (POSITIVE_SCORE_RE.test(classes)) {\n score += 25;\n }\n if (NEGATIVE_SCORE_RE.test(classes)) {\n score -= 25;\n }\n }\n\n // even if score has been set by id, add score for\n // possible photo matches\n // \"try to keep photos if we can\"\n if (PHOTO_HINTS_RE.test(classes)) {\n score += 10;\n }\n\n // add 25 if class matches entry-content-asset,\n // a class apparently instructed for use in the\n // Readability publisher guidelines\n // https://www.readability.com/developers/guidelines\n if (READABILITY_ASSET.test(classes)) {\n score += 25;\n }\n }\n\n return score;\n}\n","// returns the score of a node based on\n// the node's score attribute\n// returns null if no score set\nexport default function getScore($node) {\n return parseFloat($node.attr('score')) || null;\n}\n","// return 1 for every comma in text\nexport default function scoreCommas(text) {\n return (text.match(/,/g) || []).length;\n}\n","const idkRe = new RegExp('^(p|pre)$', 'i');\n\nexport default function scoreLength(textLength, tagName = 'p') {\n const chunks = textLength / 50;\n\n if (chunks > 0) {\n let lengthBonus;\n\n // No idea why p or pre are being tamped down here\n // but just following the source for now\n // Not even sure why tagName is included here,\n // since this is only being called from the context\n // of scoreParagraph\n if (idkRe.test(tagName)) {\n lengthBonus = chunks - 2;\n } else {\n lengthBonus = chunks - 1.25;\n }\n\n return Math.min(Math.max(lengthBonus, 0), 3);\n }\n\n return 0;\n}\n","import { scoreCommas, scoreLength } from './index';\n\n// Score a paragraph using various methods. Things like number of\n// commas, etc. Higher is better.\nexport default function scoreParagraph(node) {\n let score = 1;\n const text = node.text().trim();\n const textLength = text.length;\n\n // If this paragraph is less than 25 characters, don't count it.\n if (textLength < 25) {\n return 0;\n }\n\n // Add points for any commas within this paragraph\n score += scoreCommas(text);\n\n // For every 50 characters in this paragraph, add another point. Up\n // to 3 points.\n score += scoreLength(textLength);\n\n // Articles can end with short paragraphs when people are being clever\n // but they can also end with short paragraphs setting up lists of junk\n // that we strip. This negative tweaks junk setup paragraphs just below\n // the cutoff threshold.\n if (text.slice(-1) === ':') {\n score -= 1;\n }\n\n return score;\n}\n","export default function setScore($node, $, score) {\n $node.attr('score', score);\n return $node;\n}\n","import { getOrInitScore, setScore } from './index';\n\nexport default function addScore($node, $, amount) {\n try {\n const score = getOrInitScore($node, $) + amount;\n setScore($node, $, score);\n } catch (e) {\n // Ignoring; error occurs in scoreNode\n }\n\n return $node;\n}\n","import { addScore } from './index';\n\n// Adds 1/4 of a child's score to its parent\nexport default function addToParent(node, $, score) {\n const parent = node.parent();\n if (parent) {\n addScore(parent, $, score * 0.25);\n }\n\n return node;\n}\n","import { getScore, scoreNode, getWeight, addToParent } from './index';\n\n// gets and returns the score if it exists\n// if not, initializes a score based on\n// the node's tag type\nexport default function getOrInitScore($node, $, weightNodes = true) {\n let score = getScore($node);\n\n if (score) {\n return score;\n }\n\n score = scoreNode($node);\n\n if (weightNodes) {\n score += getWeight($node);\n }\n\n addToParent($node, $, score);\n\n return score;\n}\n","import { scoreParagraph } from './index';\nimport {\n PARAGRAPH_SCORE_TAGS,\n CHILD_CONTENT_TAGS,\n BAD_TAGS,\n} from './constants';\n\n// Score an individual node. Has some smarts for paragraphs, otherwise\n// just scores based on tag.\nexport default function scoreNode($node) {\n const { tagName } = $node.get(0);\n\n // TODO: Consider ordering by most likely.\n // E.g., if divs are a more common tag on a page,\n // Could save doing that regex test on every node – AP\n if (PARAGRAPH_SCORE_TAGS.test(tagName)) {\n return scoreParagraph($node);\n }\n if (tagName.toLowerCase() === 'div') {\n return 5;\n }\n if (CHILD_CONTENT_TAGS.test(tagName)) {\n return 3;\n }\n if (BAD_TAGS.test(tagName)) {\n return -3;\n }\n if (tagName.toLowerCase() === 'th') {\n return -5;\n }\n\n return 0;\n}\n","import { convertNodeTo } from 'utils/dom';\n\nimport { HNEWS_CONTENT_SELECTORS } from './constants';\nimport { scoreNode, setScore, getOrInitScore, addScore } from './index';\n\nfunction convertSpans($node, $) {\n if ($node.get(0)) {\n const { tagName } = $node.get(0);\n\n if (tagName === 'span') {\n // convert spans to divs\n convertNodeTo($node, $, 'div');\n }\n }\n}\n\nfunction addScoreTo($node, $, score) {\n if ($node) {\n convertSpans($node, $);\n addScore($node, $, score);\n }\n}\n\nfunction scorePs($, weightNodes) {\n $('p, pre')\n .not('[score]')\n .each((index, node) => {\n // The raw score for this paragraph, before we add any parent/child\n // scores.\n let $node = $(node);\n $node = setScore($node, $, getOrInitScore($node, $, weightNodes));\n\n const $parent = $node.parent();\n const rawScore = scoreNode($node);\n\n addScoreTo($parent, $, rawScore, weightNodes);\n if ($parent) {\n // Add half of the individual content score to the\n // grandparent\n addScoreTo($parent.parent(), $, rawScore / 2, weightNodes);\n }\n });\n\n return $;\n}\n\n// score content. Parents get the full value of their children's\n// content score, grandparents half\nexport default function scoreContent($, weightNodes = true) {\n // First, look for special hNews based selectors and give them a big\n // boost, if they exist\n HNEWS_CONTENT_SELECTORS.forEach(([parentSelector, childSelector]) => {\n $(`${parentSelector} ${childSelector}`).each((index, node) => {\n addScore($(node).parent(parentSelector), $, 80);\n });\n });\n\n // Doubling this again\n // Previous solution caused a bug\n // in which parents weren't retaining\n // scores. This is not ideal, and\n // should be fixed.\n scorePs($, weightNodes);\n scorePs($, weightNodes);\n\n return $;\n}\n","import { textLength, linkDensity } from 'utils/dom';\nimport { hasSentenceEnd } from 'utils/text';\n\nimport { NON_TOP_CANDIDATE_TAGS_RE } from './constants';\nimport { getScore } from './index';\n\n// Now that we have a top_candidate, look through the siblings of\n// it to see if any of them are decently scored. If they are, they\n// may be split parts of the content (Like two divs, a preamble and\n// a body.) Example:\n// http://articles.latimes.com/2009/oct/14/business/fi-bigtvs14\nexport default function mergeSiblings($candidate, topScore, $) {\n if (!$candidate.parent().length) {\n return $candidate;\n }\n\n const siblingScoreThreshold = Math.max(10, topScore * 0.25);\n const wrappingDiv = $('');\n\n $candidate\n .parent()\n .children()\n .each((index, sibling) => {\n const $sibling = $(sibling);\n // Ignore tags like BR, HR, etc\n if (NON_TOP_CANDIDATE_TAGS_RE.test(sibling.tagName)) {\n return null;\n }\n\n const siblingScore = getScore($sibling);\n if (siblingScore) {\n if ($sibling.get(0) === $candidate.get(0)) {\n wrappingDiv.append($sibling);\n } else {\n let contentBonus = 0;\n const density = linkDensity($sibling);\n\n // If sibling has a very low link density,\n // give it a small bonus\n if (density < 0.05) {\n contentBonus += 20;\n }\n\n // If sibling has a high link density,\n // give it a penalty\n if (density >= 0.5) {\n contentBonus -= 20;\n }\n\n // If sibling node has the same class as\n // candidate, give it a bonus\n if ($sibling.attr('class') === $candidate.attr('class')) {\n contentBonus += topScore * 0.2;\n }\n\n const newScore = siblingScore + contentBonus;\n\n if (newScore >= siblingScoreThreshold) {\n return wrappingDiv.append($sibling);\n }\n if (sibling.tagName === 'p') {\n const siblingContent = $sibling.text();\n const siblingContentLength = textLength(siblingContent);\n\n if (siblingContentLength > 80 && density < 0.25) {\n return wrappingDiv.append($sibling);\n }\n if (\n siblingContentLength <= 80 &&\n density === 0 &&\n hasSentenceEnd(siblingContent)\n ) {\n return wrappingDiv.append($sibling);\n }\n }\n }\n }\n\n return null;\n });\n\n if (\n wrappingDiv.children().length === 1 &&\n wrappingDiv\n .children()\n .first()\n .get(0) === $candidate.get(0)\n ) {\n return $candidate;\n }\n\n return wrappingDiv;\n}\n","import { NON_TOP_CANDIDATE_TAGS_RE } from './constants';\nimport { getScore } from './index';\nimport mergeSiblings from './merge-siblings';\n\n// After we've calculated scores, loop through all of the possible\n// candidate nodes we found and find the one with the highest score.\nexport default function findTopCandidate($) {\n let $candidate;\n let topScore = 0;\n\n $('[score]').each((index, node) => {\n // Ignore tags like BR, HR, etc\n if (NON_TOP_CANDIDATE_TAGS_RE.test(node.tagName)) {\n return;\n }\n\n const $node = $(node);\n const score = getScore($node);\n\n if (score > topScore) {\n topScore = score;\n $candidate = $node;\n }\n });\n\n // If we don't have a candidate, return the body\n // or whatever the first element is\n if (!$candidate) {\n return $('body') || $('*').first();\n }\n\n $candidate = mergeSiblings($candidate, topScore, $);\n\n return $candidate;\n}\n","import {\n getScore,\n setScore,\n getOrInitScore,\n scoreCommas,\n} from 'extractors/generic/content/scoring';\n\nimport { CLEAN_CONDITIONALLY_TAGS, KEEP_CLASS } from './constants';\nimport { normalizeSpaces } from '../text';\nimport { linkDensity } from './index';\n\nfunction removeUnlessContent($node, $, weight) {\n // Explicitly save entry-content-asset tags, which are\n // noted as valuable in the Publisher guidelines. For now\n // this works everywhere. We may want to consider making\n // this less of a sure-thing later.\n if ($node.hasClass('entry-content-asset')) {\n return;\n }\n\n const content = normalizeSpaces($node.text());\n\n if (scoreCommas(content) < 10) {\n const pCount = $('p', $node).length;\n const inputCount = $('input', $node).length;\n\n // Looks like a form, too many inputs.\n if (inputCount > pCount / 3) {\n $node.remove();\n return;\n }\n\n const contentLength = content.length;\n const imgCount = $('img', $node).length;\n\n // Content is too short, and there are no images, so\n // this is probably junk content.\n if (contentLength < 25 && imgCount === 0) {\n $node.remove();\n return;\n }\n\n const density = linkDensity($node);\n\n // Too high of link density, is probably a menu or\n // something similar.\n // console.log(weight, density, contentLength)\n if (weight < 25 && density > 0.2 && contentLength > 75) {\n $node.remove();\n return;\n }\n\n // Too high of a link density, despite the score being\n // high.\n if (weight >= 25 && density > 0.5) {\n // Don't remove the node if it's a list and the\n // previous sibling starts with a colon though. That\n // means it's probably content.\n const tagName = $node.get(0).tagName.toLowerCase();\n const nodeIsList = tagName === 'ol' || tagName === 'ul';\n if (nodeIsList) {\n const previousNode = $node.prev();\n if (\n previousNode &&\n normalizeSpaces(previousNode.text()).slice(-1) === ':'\n ) {\n return;\n }\n }\n\n $node.remove();\n return;\n }\n\n const scriptCount = $('script', $node).length;\n\n // Too many script tags, not enough content.\n if (scriptCount > 0 && contentLength < 150) {\n $node.remove();\n }\n }\n}\n\n// Given an article, clean it of some superfluous content specified by\n// tags. Things like forms, ads, etc.\n//\n// Tags is an array of tag name's to search through. (like div, form,\n// etc)\n//\n// Return this same doc.\nexport default function cleanTags($article, $) {\n $(CLEAN_CONDITIONALLY_TAGS, $article).each((index, node) => {\n const $node = $(node);\n // If marked to keep, skip it\n if ($node.hasClass(KEEP_CLASS) || $node.find(`.${KEEP_CLASS}`).length > 0)\n return;\n\n let weight = getScore($node);\n if (!weight) {\n weight = getOrInitScore($node, $);\n setScore($node, $, weight);\n }\n\n // drop node if its weight is < 0\n if (weight < 0) {\n $node.remove();\n } else {\n // deteremine if node seems like content\n removeUnlessContent($node, $, weight);\n }\n });\n\n return $;\n}\n","import { getWeight } from 'extractors/generic/content/scoring';\n\nimport { HEADER_TAG_LIST } from './constants';\nimport { normalizeSpaces } from '../text';\n\nexport default function cleanHeaders($article, $, title = '') {\n $(HEADER_TAG_LIST, $article).each((index, header) => {\n const $header = $(header);\n // Remove any headers that appear before all other p tags in the\n // document. This probably means that it was part of the title, a\n // subtitle or something else extraneous like a datestamp or byline,\n // all of which should be handled by other metadata handling.\n if ($($header, $article).prevAll('p').length === 0) {\n return $header.remove();\n }\n\n // Remove any headers that match the title exactly.\n if (normalizeSpaces($(header).text()) === title) {\n return $header.remove();\n }\n\n // If this header has a negative weight, it's probably junk.\n // Get rid of it.\n if (getWeight($(header)) < 0) {\n return $header.remove();\n }\n\n return $header;\n });\n\n return $;\n}\n","import { convertNodeTo } from 'utils/dom';\n\n// Rewrite the tag name to div if it's a top level node like body or\n// html to avoid later complications with multiple body tags.\nexport default function rewriteTopLevel(article, $) {\n // I'm not using context here because\n // it's problematic when converting the\n // top-level/root node - AP\n $ = convertNodeTo($('html'), $, 'div');\n $ = convertNodeTo($('body'), $, 'div');\n\n return $;\n}\n","import URL from 'url';\n\nimport { getAttrs, setAttr } from 'utils/dom';\n\nfunction absolutize($, rootUrl, attr) {\n const baseUrl = $('base').attr('href');\n\n $(`[${attr}]`).each((_, node) => {\n const attrs = getAttrs(node);\n const url = attrs[attr];\n if (!url) return;\n const absoluteUrl = URL.resolve(baseUrl || rootUrl, url);\n\n setAttr(node, attr, absoluteUrl);\n });\n}\n\nfunction absolutizeSet($, rootUrl, $content) {\n $('[srcset]', $content).each((_, node) => {\n const attrs = getAttrs(node);\n const urlSet = attrs.srcset;\n\n if (urlSet) {\n // a comma should be considered part of the candidate URL unless preceded by a descriptor\n // descriptors can only contain positive numbers followed immediately by either 'w' or 'x'\n // space characters inside the URL should be encoded (%20 or +)\n const candidates = urlSet.match(\n /(?:\\s*)(\\S+(?:\\s*[\\d.]+[wx])?)(?:\\s*,\\s*)?/g\n );\n if (!candidates) return;\n const absoluteCandidates = candidates.map(candidate => {\n // a candidate URL cannot start or end with a comma\n // descriptors are separated from the URLs by unescaped whitespace\n const parts = candidate\n .trim()\n .replace(/,$/, '')\n .split(/\\s+/);\n parts[0] = URL.resolve(rootUrl, parts[0]);\n return parts.join(' ');\n });\n const absoluteUrlSet = [...new Set(absoluteCandidates)].join(', ');\n setAttr(node, 'srcset', absoluteUrlSet);\n }\n });\n}\n\nexport default function makeLinksAbsolute($content, $, url) {\n ['href', 'src'].forEach(attr => absolutize($, url, attr));\n absolutizeSet($, url, $content);\n\n return $content;\n}\n","export function textLength(text) {\n return text.trim().replace(/\\s+/g, ' ').length;\n}\n\n// Determines what percentage of the text\n// in a node is link text\n// Takes a node, returns a float\nexport function linkDensity($node) {\n const totalTextLength = textLength($node.text());\n\n const linkText = $node.find('a').text();\n const linkLength = textLength(linkText);\n\n if (totalTextLength > 0) {\n return linkLength / totalTextLength;\n }\n if (totalTextLength === 0 && linkLength > 0) {\n return 1;\n }\n\n return 0;\n}\n","import { stripTags } from 'utils/dom';\n\n// Given a node type to search for, and a list of meta tag names to\n// search for, find a meta tag associated.\nexport default function extractFromMeta(\n $,\n metaNames,\n cachedNames,\n cleanTags = true\n) {\n const foundNames = metaNames.filter(name => cachedNames.indexOf(name) !== -1);\n\n // eslint-disable-next-line no-restricted-syntax\n for (const name of foundNames) {\n const type = 'name';\n const value = 'value';\n\n const nodes = $(`meta[${type}=\"${name}\"]`);\n\n // Get the unique value of every matching node, in case there\n // are two meta tags with the same name and value.\n // Remove empty values.\n const values = nodes\n .map((index, node) => $(node).attr(value))\n .toArray()\n .filter(text => text !== '');\n\n // If we have more than one value for the same name, we have a\n // conflict and can't trust any of them. Skip this name. If we have\n // zero, that means our meta tags had no values. Skip this name\n // also.\n if (values.length === 1) {\n let metaValue;\n // Meta values that contain HTML should be stripped, as they\n // weren't subject to cleaning previously.\n if (cleanTags) {\n metaValue = stripTags(values[0], $);\n } else {\n [metaValue] = values;\n }\n\n return metaValue;\n }\n }\n\n // If nothing is found, return null\n return null;\n}\n","import { withinComment } from 'utils/dom';\n\nfunction isGoodNode($node, maxChildren) {\n // If it has a number of children, it's more likely a container\n // element. Skip it.\n if ($node.children().length > maxChildren) {\n return false;\n }\n // If it looks to be within a comment, skip it.\n if (withinComment($node)) {\n return false;\n }\n\n return true;\n}\n\n// Given a a list of selectors find content that may\n// be extractable from the document. This is for flat\n// meta-information, like author, title, date published, etc.\nexport default function extractFromSelectors(\n $,\n selectors,\n maxChildren = 1,\n textOnly = true\n) {\n // eslint-disable-next-line no-restricted-syntax\n for (const selector of selectors) {\n const nodes = $(selector);\n\n // If we didn't get exactly one of this selector, this may be\n // a list of articles or comments. Skip it.\n if (nodes.length === 1) {\n const $node = $(nodes[0]);\n\n if (isGoodNode($node, maxChildren)) {\n let content;\n if (textOnly) {\n content = $node.text();\n } else {\n content = $node.html();\n }\n\n if (content) {\n return content;\n }\n }\n }\n }\n\n return null;\n}\n","// strips all tags from a string of text\nexport default function stripTags(text, $) {\n // Wrapping text in html element prevents errors when text\n // has no html\n const cleanText = $(`${text}`).text();\n return cleanText === '' ? text : cleanText;\n}\n","import { getAttrs } from 'utils/dom';\n\nexport default function withinComment($node) {\n const parents = $node.parents().toArray();\n const commentParent = parents.find(parent => {\n const attrs = getAttrs(parent);\n const { class: nodeClass, id } = attrs;\n const classAndId = `${nodeClass} ${id}`;\n return classAndId.includes('comment');\n });\n\n return commentParent !== undefined;\n}\n","// Given a node, determine if it's article-like enough to return\n// param: node (a cheerio node)\n// return: boolean\n\nexport default function nodeIsSufficient($node) {\n return $node.text().trim().length >= 100;\n}\n","import { IS_WP_SELECTOR } from './constants';\n\nexport default function isWordpress($) {\n return $(IS_WP_SELECTOR).length > 0;\n}\n","export default function getAttrs(node) {\n const { attribs, attributes } = node;\n\n if (!attribs && attributes) {\n const attrs = Reflect.ownKeys(attributes).reduce((acc, index) => {\n const attr = attributes[index];\n\n if (!attr.name || !attr.value) return acc;\n\n acc[attr.name] = attr.value;\n return acc;\n }, {});\n return attrs;\n }\n\n return attribs;\n}\n","export default function setAttr(node, attr, val) {\n if (node.attribs) {\n node.attribs[attr] = val;\n } else if (node.attributes) {\n node.setAttribute(attr, val);\n }\n\n return node;\n}\n","export default function setAttrs(node, attrs) {\n if (node.attribs) {\n node.attribs = attrs;\n } else if (node.attributes) {\n while (node.attributes.length > 0) {\n node.removeAttribute(node.attributes[0].name);\n }\n\n Reflect.ownKeys(attrs).forEach(key => {\n node.setAttribute(key, attrs[key]);\n });\n }\n\n return node;\n}\n","export const IS_LINK = new RegExp('https?://', 'i');\nconst IMAGE_RE = '.(png|gif|jpe?g)';\nexport const IS_IMAGE = new RegExp(`${IMAGE_RE}`, 'i');\nexport const IS_SRCSET = new RegExp(\n `${IMAGE_RE}(\\\\?\\\\S+)?(\\\\s*[\\\\d.]+[wx])`,\n 'i'\n);\n\nexport const TAGS_TO_REMOVE = ['script', 'style', 'form'].join(',');\n","import { getAttrs } from 'utils/dom';\n\nimport { IS_LINK, IS_IMAGE, IS_SRCSET } from './constants';\n\n// Convert all instances of images with potentially\n// lazy loaded images into normal images.\n// Many sites will have img tags with no source, or an image tag with a src\n// attribute that a is a placeholer. We need to be able to properly fill in\n// the src attribute so the images are no longer lazy loaded.\nexport default function convertLazyLoadedImages($) {\n $('img').each((_, img) => {\n const attrs = getAttrs(img);\n\n Reflect.ownKeys(attrs).forEach(attr => {\n const value = attrs[attr];\n\n if (attr !== 'srcset' && IS_LINK.test(value) && IS_SRCSET.test(value)) {\n $(img).attr('srcset', value);\n } else if (\n attr !== 'src' &&\n attr !== 'srcset' &&\n IS_LINK.test(value) &&\n IS_IMAGE.test(value)\n ) {\n $(img).attr('src', value);\n }\n });\n });\n\n return $;\n}\n","import { TAGS_TO_REMOVE } from './constants';\n\nfunction isComment(index, node) {\n return node.type === 'comment';\n}\n\nfunction cleanComments($) {\n $.root()\n .find('*')\n .contents()\n .filter(isComment)\n .remove();\n\n return $;\n}\n\nexport default function clean($) {\n $(TAGS_TO_REMOVE).remove();\n\n $ = cleanComments($);\n return $;\n}\n","import cheerio from 'cheerio';\nimport iconv from 'iconv-lite';\n\nimport { getEncoding } from 'utils/text';\nimport { fetchResource } from './utils';\nimport { normalizeMetaTags, convertLazyLoadedImages, clean } from './utils/dom';\n\nconst Resource = {\n // Create a Resource.\n //\n // :param url: The URL for the document we should retrieve.\n // :param response: If set, use as the response rather than\n // attempting to fetch it ourselves. Expects a\n // string.\n // :param headers: Custom headers to be included in the request\n async create(url, preparedResponse, parsedUrl, headers = {}) {\n let result;\n\n if (preparedResponse) {\n const validResponse = {\n statusMessage: 'OK',\n statusCode: 200,\n headers: {\n 'content-type': 'text/html',\n 'content-length': 500,\n },\n };\n\n result = {\n body: preparedResponse,\n response: validResponse,\n alreadyDecoded: true,\n };\n } else {\n result = await fetchResource(url, parsedUrl, headers);\n }\n\n if (result.error) {\n result.failed = true;\n return result;\n }\n\n return this.generateDoc(result);\n },\n\n generateDoc({ body: content, response, alreadyDecoded = false }) {\n const { 'content-type': contentType = '' } = response.headers;\n\n // TODO: Implement is_text function from\n // https://github.com/ReadabilityHoldings/readability/blob/8dc89613241d04741ebd42fa9fa7df1b1d746303/readability/utils/text.py#L57\n if (!contentType.includes('html') && !contentType.includes('text')) {\n throw new Error('Content does not appear to be text.');\n }\n\n let $ = this.encodeDoc({ content, contentType, alreadyDecoded });\n\n if ($.root().children().length === 0) {\n throw new Error('No children, likely a bad parse.');\n }\n\n $ = normalizeMetaTags($);\n $ = convertLazyLoadedImages($);\n $ = clean($);\n\n return $;\n },\n\n encodeDoc({ content, contentType, alreadyDecoded = false }) {\n if (alreadyDecoded) {\n return cheerio.load(content);\n }\n\n const encoding = getEncoding(contentType);\n let decodedContent = iconv.decode(content, encoding);\n let $ = cheerio.load(decodedContent);\n // after first cheerio.load, check to see if encoding matches\n const contentTypeSelector = cheerio.browser\n ? 'meta[http-equiv=content-type]'\n : 'meta[http-equiv=content-type i]';\n const metaContentType =\n $(contentTypeSelector).attr('content') ||\n $('meta[charset]').attr('charset');\n const properEncoding = getEncoding(metaContentType);\n\n // if encodings in the header/body dont match, use the one in the body\n if (metaContentType && properEncoding !== encoding) {\n decodedContent = iconv.decode(content, properEncoding);\n $ = cheerio.load(decodedContent);\n }\n\n return $;\n },\n};\n\nexport default Resource;\n","export default function* range(start = 1, end = 1) {\n while (start <= end) {\n yield (start += 1);\n }\n}\n","// extremely simple url validation as a first step\nexport default function validateUrl({ hostname }) {\n // If this isn't a valid url, return an error message\n return !!hostname;\n}\n","const merge = (extractor, domains) =>\n domains.reduce((acc, domain) => {\n acc[domain] = extractor;\n return acc;\n }, {});\n\nexport default function mergeSupportedDomains(extractor) {\n return extractor.supportedDomains\n ? merge(extractor, [extractor.domain, ...extractor.supportedDomains])\n : merge(extractor, [extractor.domain]);\n}\n","import mergeSupportedDomains from '../utils/merge-supported-domains';\n\nexport const apiExtractors = {};\n\nexport default function addExtractor(extractor) {\n if (!extractor || !extractor.domain) {\n return {\n error: true,\n message: 'Unable to add custom extractor. Invalid parameters.',\n };\n }\n\n Object.assign(apiExtractors, mergeSupportedDomains(extractor));\n\n return apiExtractors;\n}\n","export const BloggerExtractor = {\n domain: 'blogspot.com',\n content: {\n // Blogger is insane and does not load its content\n // initially in the page, but it's all there\n // in noscript\n selectors: ['.post-content noscript'],\n\n // Selectors to remove from the extracted content\n clean: [],\n\n // Convert the noscript tag to a div\n transforms: {\n noscript: 'div',\n },\n },\n\n author: {\n selectors: ['.post-author-name'],\n },\n\n title: {\n selectors: ['.post h2.title'],\n },\n\n date_published: {\n selectors: ['span.publishdate'],\n },\n};\n","export const NYMagExtractor = {\n domain: 'nymag.com',\n content: {\n // Order by most likely. Extractor will stop on first occurrence\n selectors: ['div.article-content', 'section.body', 'article.article'],\n\n // Selectors to remove from the extracted content\n clean: ['.ad', '.single-related-story'],\n\n // Object of tranformations to make on matched elements\n // Each key is the selector, each value is the tag to\n // transform to.\n // If a function is given, it should return a string\n // to convert to or nothing (in which case it will not perform\n // the transformation.\n transforms: {\n // Convert h1s to h2s\n h1: 'h2',\n\n // Convert lazy-loaded noscript images to figures\n noscript: ($node, $) => {\n const $children = $.browser ? $($node.text()) : $node.children();\n if (\n $children.length === 1 &&\n $children.get(0) !== undefined &&\n $children.get(0).tagName.toLowerCase() === 'img'\n ) {\n return 'figure';\n }\n\n return null;\n },\n },\n },\n\n title: {\n selectors: ['h1.lede-feature-title', 'h1.headline-primary', 'h1'],\n },\n\n author: {\n selectors: ['.by-authors', '.lede-feature-author'],\n },\n\n dek: {\n selectors: ['.lede-feature-teaser'],\n },\n\n date_published: {\n selectors: [\n ['time.article-timestamp[datetime]', 'datetime'],\n 'time.article-timestamp',\n ],\n },\n};\n","export const WikipediaExtractor = {\n domain: 'wikipedia.org',\n content: {\n selectors: ['#mw-content-text'],\n\n defaultCleaner: false,\n\n // transform top infobox to an image with caption\n transforms: {\n '.infobox img': $node => {\n const $parent = $node.parents('.infobox');\n // Only prepend the first image in .infobox\n if ($parent.children('img').length === 0) {\n $parent.prepend($node);\n }\n },\n '.infobox caption': 'figcaption',\n '.infobox': 'figure',\n },\n\n // Selectors to remove from the extracted content\n clean: [\n '.mw-editsection',\n 'figure tr, figure td, figure tbody',\n '#toc',\n '.navbox',\n ],\n },\n\n author: 'Wikipedia Contributors',\n\n title: {\n selectors: ['h2.title'],\n },\n\n date_published: {\n selectors: ['#footer-info-lastmod'],\n },\n};\n","export const TwitterExtractor = {\n domain: 'twitter.com',\n\n content: {\n transforms: {\n // We're transforming essentially the whole page here.\n // Twitter doesn't have nice selectors, so our initial\n // selector grabs the whole page, then we're re-writing\n // it to fit our needs before we clean it up.\n '.permalink[role=main]': ($node, $) => {\n const tweets = $node.find('.tweet');\n const $tweetContainer = $('');\n $tweetContainer.append(tweets);\n $node.replaceWith($tweetContainer);\n },\n\n // Twitter wraps @ with s, which\n // renders as a strikethrough\n s: 'span',\n },\n\n selectors: ['.permalink[role=main]'],\n\n defaultCleaner: false,\n\n clean: ['.stream-item-footer', 'button', '.tweet-details-fixer'],\n },\n\n author: {\n selectors: ['.tweet.permalink-tweet .username'],\n },\n\n date_published: {\n selectors: [['.permalink-tweet ._timestamp[data-time-ms]', 'data-time-ms']],\n },\n};\n","export const NYTimesExtractor = {\n domain: 'www.nytimes.com',\n\n title: {\n selectors: [\n 'h1.g-headline',\n 'h1[itemprop=\"headline\"]',\n 'h1.headline',\n 'h1 .balancedHeadline',\n ],\n },\n\n author: {\n selectors: [\n ['meta[name=\"author\"]', 'value'],\n '.g-byline',\n '.byline',\n ['meta[name=\"byl\"]', 'value'],\n ],\n },\n\n content: {\n selectors: ['div.g-blocks', 'section[name=\"articleBody\"]', 'article#story'],\n\n transforms: {\n 'img.g-lazy': $node => {\n let src = $node.attr('src');\n const width = 640;\n\n src = src.replace('{{size}}', width);\n $node.attr('src', src);\n },\n },\n\n clean: [\n '.ad',\n 'header#story-header',\n '.story-body-1 .lede.video',\n '.visually-hidden',\n '#newsletter-promo',\n '.promo',\n '.comments-button',\n '.hidden',\n '.comments',\n '.supplemental',\n '.nocontent',\n '.story-footer-links',\n ],\n },\n\n date_published: {\n selectors: [['meta[name=\"article:published\"]', 'value']],\n },\n\n lead_image_url: {\n selectors: [['meta[name=\"og:image\"]', 'value']],\n },\n\n dek: null,\n\n next_page_url: null,\n\n excerpt: null,\n};\n","// Rename CustomExtractor\n// to fit your publication\nexport const TheAtlanticExtractor = {\n domain: 'www.theatlantic.com',\n title: {\n selectors: ['h1', '.c-article-header__hed'],\n },\n\n author: {\n selectors: [['meta[name=\"author\"]', 'value'], '.c-byline__author'],\n },\n\n content: {\n selectors: ['article', '.article-body'],\n\n // Is there anything in the content you selected that needs transformed\n // before it's consumable content? E.g., unusual lazy loaded images\n transforms: [],\n\n // Is there anything that is in the result that shouldn't be?\n // The clean selectors will remove anything that matches from\n // the result\n clean: [\n '.partner-box',\n '.callout',\n '.c-article-writer__image',\n '.c-article-writer__content',\n '.c-letters-cta__text',\n '.c-footer__logo',\n '.c-recirculation-link',\n '.twitter-tweet',\n ],\n },\n\n dek: {\n selectors: [['meta[name=\"description\"]', 'value']],\n },\n\n date_published: {\n selectors: [['time[itemprop=\"datePublished\"]', 'datetime']],\n },\n\n lead_image_url: {\n selectors: [['img[itemprop=\"url\"]', 'src']],\n },\n\n next_page_url: null,\n\n excerpt: null,\n};\n","// Rename CustomExtractor\n// to fit your publication\n// (e.g., NYTimesExtractor)\nexport const NewYorkerExtractor = {\n domain: 'www.newyorker.com',\n title: {\n selectors: [\n 'h1[class^=\"content-header\"]',\n 'h1[class^=\"ArticleHeader__hed\"]',\n ['meta[name=\"og:title\"]', 'value'],\n ],\n },\n\n author: {\n selectors: [\n ['meta[name=\"author\"]', 'value'],\n 'div[class^=\"ArticleContributors\"] a[rel=\"author\"]',\n 'article header div[class*=\"Byline__multipleContributors\"]',\n ],\n },\n\n content: {\n selectors: [\n 'article.article.main-content',\n 'main[class^=\"Layout__content\"]',\n ],\n\n // Is there anything in the content you selected that needs transformed\n // before it's consumable content? E.g., unusual lazy loaded images\n transforms: [],\n\n // Is there anything that is in the result that shouldn't be?\n // The clean selectors will remove anything that matches from\n // the result\n clean: ['footer[class^=\"ArticleFooter__footer\"]'],\n },\n\n date_published: {\n selectors: [\n 'time.content-header__publish-date',\n ['meta[name=\"pubdate\"]', 'value'],\n ],\n timezone: 'America/New_York',\n },\n\n lead_image_url: {\n selectors: [['meta[name=\"og:image\"]', 'value']],\n },\n\n dek: {\n selectors: ['div.content-header__dek', 'h2[class^=\"ArticleHeader__dek\"]'],\n },\n\n next_page_url: null,\n\n excerpt: null,\n};\n","// Rename CustomExtractor\n// to fit your publication\n// (e.g., NYTimesExtractor)\nexport const WiredExtractor = {\n domain: 'www.wired.com',\n title: {\n selectors: [\n 'h1.content-header__hed',\n 'h1.post-title',\n // enter title selectors\n ],\n },\n\n author: {\n selectors: [\n ['meta[name=\"author\"]', 'value'],\n 'a[rel=\"author\"]',\n // enter author selectors\n ],\n },\n\n content: {\n selectors: [\n 'article.article.main-content',\n 'article.content',\n // enter content selectors\n ],\n\n // Is there anything in the content you selected that needs transformed\n // before it's consumable content? E.g., unusual lazy loaded images\n transforms: [],\n\n // Is there anything that is in the result that shouldn't be?\n // The clean selectors will remove anything that matches from\n // the result\n clean: ['.visually-hidden', 'figcaption img.photo'],\n },\n\n date_published: {\n selectors: [\n 'time.content-header__publish-date',\n ['meta[itemprop=\"datePublished\"]', 'value'],\n ],\n },\n\n lead_image_url: {\n selectors: [['meta[name=\"og:image\"]', 'value']],\n },\n\n dek: {\n selectors: [],\n },\n\n next_page_url: null,\n\n excerpt: null,\n};\n","// Rename CustomExtractor\n// to fit your publication\n// (e.g., NYTimesExtractor)\nexport const MSNExtractor = {\n domain: 'www.msn.com',\n title: {\n selectors: [\n 'h1',\n // enter title selectors\n ],\n },\n\n author: {\n selectors: [\n 'span.authorname-txt',\n // enter author selectors\n ],\n },\n\n content: {\n selectors: [\n 'div.richtext',\n // enter content selectors\n ],\n\n // Is there anything in the content you selected that needs transformed\n // before it's consumable content? E.g., unusual lazy loaded images\n transforms: [],\n\n // Is there anything that is in the result that shouldn't be?\n // The clean selectors will remove anything that matches from\n // the result\n clean: ['span.caption'],\n },\n\n date_published: {\n selectors: ['span.time'],\n },\n\n lead_image_url: {\n selectors: [],\n },\n\n dek: {\n selectors: [],\n },\n\n next_page_url: null,\n\n excerpt: null,\n};\n","// Rename CustomExtractor\n// to fit your publication\n// (e.g., NYTimesExtractor)\nexport const YahooExtractor = {\n domain: 'www.yahoo.com',\n title: {\n selectors: [\n 'header.canvas-header',\n // enter title selectors\n ],\n },\n\n author: {\n selectors: [\n 'span.provider-name',\n // enter author selectors\n ],\n },\n\n content: {\n selectors: [\n // enter content selectors\n '.content-canvas',\n ],\n\n // Is there anything in the content you selected that needs transformed\n // before it's consumable content? E.g., unusual lazy loaded images\n transforms: [],\n\n // Is there anything that is in the result that shouldn't be?\n // The clean selectors will remove anything that matches from\n // the result\n clean: ['.figure-caption'],\n },\n\n date_published: {\n selectors: [['time.date[datetime]', 'datetime']],\n },\n\n lead_image_url: {\n selectors: [['meta[name=\"og:image\"]', 'value']],\n },\n\n dek: {\n selectors: [\n // enter dek selectors\n ],\n },\n\n next_page_url: null,\n\n excerpt: null,\n};\n","// Rename CustomExtractor\n// to fit your publication\n// (e.g., NYTimesExtractor)\nexport const BuzzfeedExtractor = {\n domain: 'www.buzzfeed.com',\n title: {\n selectors: [\n 'h1[id=\"post-title\"]',\n // enter title selectors\n ],\n },\n\n author: {\n selectors: [\n 'a[data-action=\"user/username\"]',\n 'byline__author',\n // enter author selectors\n ],\n },\n\n content: {\n selectors: [\n ['.longform_custom_header_media', '#buzz_sub_buzz'],\n '#buzz_sub_buzz',\n ],\n\n defaultCleaner: false,\n\n // Is there anything in the content you selected that needs transformed\n // before it's consumable content? E.g., unusual lazy loaded images\n transforms: {\n h2: 'b',\n\n 'div.longform_custom_header_media': $node => {\n if ($node.has('img') && $node.has('.longform_header_image_source')) {\n return 'figure';\n }\n\n return null;\n },\n\n 'figure.longform_custom_header_media .longform_header_image_source':\n 'figcaption',\n },\n\n // Is there anything that is in the result that shouldn't be?\n // The clean selectors will remove anything that matches from\n // the result\n clean: [\n '.instapaper_ignore',\n '.suplist_list_hide .buzz_superlist_item .buzz_superlist_number_inline',\n '.share-box',\n '.print',\n ],\n },\n\n date_published: {\n selectors: ['.buzz-datetime'],\n },\n\n lead_image_url: {\n selectors: [['meta[name=\"og:image\"]', 'value']],\n },\n\n dek: {\n selectors: [],\n },\n\n next_page_url: null,\n\n excerpt: null,\n};\n","// Rename CustomExtractor\n// to fit your publication\n// (e.g., NYTimesExtractor)\nexport const WikiaExtractor = {\n domain: 'fandom.wikia.com',\n title: {\n selectors: [\n 'h1.entry-title',\n // enter title selectors\n ],\n },\n\n author: {\n selectors: [\n '.author vcard',\n '.fn',\n // enter author selectors\n ],\n },\n\n content: {\n selectors: [\n '.grid-content',\n '.entry-content',\n // enter content selectors\n ],\n\n // Is there anything in the content you selected that needs transformed\n // before it's consumable content? E.g., unusual lazy loaded images\n transforms: [],\n\n // Is there anything that is in the result that shouldn't be?\n // The clean selectors will remove anything that matches from\n // the result\n clean: [],\n },\n\n date_published: {\n selectors: [['meta[name=\"article:published_time\"]', 'value']],\n },\n\n lead_image_url: {\n selectors: [['meta[name=\"og:image\"]', 'value']],\n },\n\n dek: {\n selectors: [],\n },\n\n next_page_url: null,\n\n excerpt: null,\n};\n","// Rename CustomExtractor\n// to fit your publication\n// (e.g., NYTimesExtractor)\nexport const LittleThingsExtractor = {\n domain: 'www.littlethings.com',\n title: {\n selectors: [\n 'h1.post-title',\n // enter title selectors\n ],\n },\n\n author: {\n selectors: [\n ['meta[name=\"author\"]', 'value'],\n // enter author selectors\n ],\n },\n\n content: {\n selectors: [\n // enter content selectors\n '.mainContentIntro',\n '.content-wrapper',\n ],\n\n // Is there anything in the content you selected that needs transformed\n // before it's consumable content? E.g., unusual lazy loaded images\n transforms: [],\n\n // Is there anything that is in the result that shouldn't be?\n // The clean selectors will remove anything that matches from\n // the result\n clean: [],\n },\n\n lead_image_url: {\n selectors: [['meta[name=\"og:image\"]', 'value']],\n },\n\n next_page_url: null,\n\n excerpt: null,\n};\n","// Rename CustomExtractor\n// to fit your publication\n// (e.g., NYTimesExtractor)\nexport const PoliticoExtractor = {\n domain: 'www.politico.com',\n title: {\n selectors: [\n // enter title selectors\n ['meta[name=\"og:title\"]', 'value'],\n ],\n },\n\n author: {\n selectors: ['.story-main-content .byline .vcard'],\n },\n\n content: {\n selectors: [\n // enter content selectors\n '.story-main-content',\n '.content-group',\n '.story-core',\n '.story-text',\n ],\n\n // Is there anything in the content you selected that needs transformed\n // before it's consumable content? E.g., unusual lazy loaded images\n transforms: [],\n\n // Is there anything that is in the result that shouldn't be?\n // The clean selectors will remove anything that matches from\n // the result\n clean: ['figcaption'],\n },\n\n date_published: {\n selectors: [['.story-main-content .timestamp time[datetime]', 'datetime']],\n },\n\n lead_image_url: {\n selectors: [\n // enter lead_image_url selectors\n ['meta[name=\"og:image\"]', 'value'],\n ],\n },\n\n dek: {\n selectors: [],\n },\n\n next_page_url: null,\n\n excerpt: null,\n};\n","export const DeadspinExtractor = {\n domain: 'deadspin.com',\n\n supportedDomains: [\n 'jezebel.com',\n 'lifehacker.com',\n 'kotaku.com',\n 'gizmodo.com',\n 'jalopnik.com',\n 'kinja.com',\n 'avclub.com',\n 'clickhole.com',\n 'splinternews.com',\n 'theonion.com',\n 'theroot.com',\n 'thetakeout.com',\n 'theinventory.com',\n ],\n\n title: {\n selectors: ['h1.headline'],\n },\n\n author: {\n selectors: ['.author'],\n },\n\n content: {\n selectors: ['.post-content', '.entry-content'],\n\n // Is there anything in the content you selected that needs transformed\n // before it's consumable content? E.g., unusual lazy loaded images\n transforms: {\n 'iframe.lazyload[data-recommend-id^=\"youtube://\"]': $node => {\n const youtubeId = $node.attr('id').split('youtube-')[1];\n $node.attr('src', `https://www.youtube.com/embed/${youtubeId}`);\n },\n },\n\n // Is there anything that is in the result that shouldn't be?\n // The clean selectors will remove anything that matches from\n // the result\n clean: ['.magnifier', '.lightbox'],\n },\n\n date_published: {\n selectors: [['time.updated[datetime]', 'datetime']],\n },\n\n lead_image_url: {\n selectors: [['meta[name=\"og:image\"]', 'value']],\n },\n\n dek: {\n selectors: [\n // enter selectors\n ],\n },\n\n next_page_url: {\n selectors: [\n // enter selectors\n ],\n },\n\n excerpt: {\n selectors: [\n // enter selectors\n ],\n },\n};\n","// Rename CustomExtractor\n// to fit your publication\n// (e.g., NYTimesExtractor)\nexport const BroadwayWorldExtractor = {\n domain: 'www.broadwayworld.com',\n title: {\n selectors: ['h1.article-title'],\n },\n\n author: {\n selectors: ['span[itemprop=author]'],\n },\n\n content: {\n selectors: ['div[itemprop=articlebody]'],\n\n // Is there anything in the content you selected that needs transformed\n // before it's consumable content? E.g., unusual lazy loaded images\n transforms: {},\n\n // Is there anything that is in the result that shouldn't be?\n // The clean selectors will remove anything that matches from\n // the result\n clean: [],\n },\n\n date_published: {\n selectors: [['meta[itemprop=datePublished]', 'value']],\n },\n\n lead_image_url: {\n selectors: [['meta[name=\"og:image\"]', 'value']],\n },\n\n dek: {\n selectors: [],\n },\n\n next_page_url: {\n selectors: [\n // enter selectors\n ],\n },\n\n excerpt: {\n selectors: [\n // enter selectors\n ],\n },\n};\n","// Rename CustomExtractor\n// to fit your publication\n// (e.g., NYTimesExtractor)\nexport const ApartmentTherapyExtractor = {\n domain: 'www.apartmenttherapy.com',\n title: {\n selectors: ['h1.headline'],\n },\n\n author: {\n selectors: ['.PostByline__name'],\n },\n\n content: {\n selectors: ['div.post__content'],\n\n // Is there anything in the content you selected that needs transformed\n // before it's consumable content? E.g., unusual lazy loaded images\n transforms: {\n 'div[data-render-react-id=\"images/LazyPicture\"]': ($node, $) => {\n const data = JSON.parse($node.attr('data-props'));\n const { src } = data.sources[0];\n const $img = $('').attr('src', src);\n $node.replaceWith($img);\n },\n },\n\n // Is there anything that is in the result that shouldn't be?\n // The clean selectors will remove anything that matches from\n // the result\n clean: [],\n },\n\n date_published: {\n selectors: [['.PostByline__timestamp[datetime]', 'datetime']],\n },\n\n lead_image_url: {\n selectors: [['meta[name=\"og:image\"]', 'value']],\n },\n\n dek: {\n selectors: [],\n },\n\n next_page_url: {\n selectors: [\n // enter selectors\n ],\n },\n\n excerpt: {\n selectors: [\n // enter selectors\n ],\n },\n};\n","export const MediumExtractor = {\n domain: 'medium.com',\n\n title: {\n selectors: ['h1', ['meta[name=\"og:title\"]', 'value']],\n },\n\n author: {\n selectors: [['meta[name=\"author\"]', 'value']],\n },\n\n content: {\n selectors: ['article'],\n\n // Is there anything in the content you selected that needs transformed\n // before it's consumable content? E.g., unusual lazy loaded images\n transforms: {\n // Re-write lazy-loaded youtube videos\n iframe: $node => {\n const ytRe = /https:\\/\\/i.embed.ly\\/.+url=https:\\/\\/i\\.ytimg\\.com\\/vi\\/(\\w+)\\//;\n const thumb = decodeURIComponent($node.attr('data-thumbnail'));\n const $parent = $node.parents('figure');\n\n if (ytRe.test(thumb)) {\n const [_, youtubeId] = thumb.match(ytRe); // eslint-disable-line\n $node.attr('src', `https://www.youtube.com/embed/${youtubeId}`);\n const $caption = $parent.find('figcaption');\n $parent.empty().append([$node, $caption]);\n return;\n }\n\n // If we can't draw the YouTube preview, remove the figure.\n $parent.remove();\n },\n\n // rewrite figures to pull out image and caption, remove rest\n figure: $node => {\n // ignore if figure has an iframe\n if ($node.find('iframe').length > 0) return;\n\n const $img = $node.find('img').slice(-1)[0];\n const $caption = $node.find('figcaption');\n\n $node.empty().append([$img, $caption]);\n },\n\n // Remove any smaller images that did not get caught by the generic image\n // cleaner (author photo 48px, leading sentence images 79px, etc.).\n img: $node => {\n const width = parseInt($node.attr('width'), 10);\n if (width < 100) $node.remove();\n },\n },\n\n // Is there anything that is in the result that shouldn't be?\n // The clean selectors will remove anything that matches from\n // the result\n clean: ['span', 'svg'],\n },\n\n date_published: {\n selectors: [['meta[name=\"article:published_time\"]', 'value']],\n },\n\n lead_image_url: {\n selectors: [['meta[name=\"og:image\"]', 'value']],\n },\n\n dek: null,\n\n next_page_url: {\n selectors: [\n // enter selectors\n ],\n },\n\n excerpt: {\n selectors: [\n // enter selectors\n ],\n },\n};\n","export const WwwTmzComExtractor = {\n domain: 'www.tmz.com',\n\n title: {\n selectors: ['.post-title-breadcrumb', 'h1', '.headline'],\n },\n\n author: 'TMZ STAFF',\n\n date_published: {\n selectors: ['.article-posted-date'],\n\n timezone: 'America/Los_Angeles',\n },\n\n dek: {\n selectors: [\n // enter selectors\n ],\n },\n\n lead_image_url: {\n selectors: [['meta[name=\"og:image\"]', 'value']],\n },\n\n content: {\n selectors: ['.article-content', '.all-post-body'],\n\n // Is there anything in the content you selected that needs transformed\n // before it's consumable content? E.g., unusual lazy loaded images\n transforms: {},\n\n // Is there anything that is in the result that shouldn't be?\n // The clean selectors will remove anything that matches from\n // the result\n clean: ['.lightbox-link'],\n },\n};\n","export const WwwWashingtonpostComExtractor = {\n domain: 'www.washingtonpost.com',\n\n title: {\n selectors: ['h1', '#topper-headline-wrapper'],\n },\n\n author: {\n selectors: ['.pb-author-name'],\n },\n\n date_published: {\n selectors: [['.author-timestamp[itemprop=\"datePublished\"]', 'content']],\n },\n\n dek: {\n selectors: [],\n },\n\n lead_image_url: {\n selectors: [['meta[name=\"og:image\"]', 'value']],\n },\n\n content: {\n selectors: ['.article-body'],\n\n // Is there anything in the content you selected that needs transformed\n // before it's consumable content? E.g., unusual lazy loaded images\n transforms: {\n 'div.inline-content': $node => {\n if ($node.has('img,iframe,video').length > 0) {\n return 'figure';\n }\n\n $node.remove();\n return null;\n },\n '.pb-caption': 'figcaption',\n },\n\n // Is there anything that is in the result that shouldn't be?\n // The clean selectors will remove anything that matches from\n // the result\n clean: ['.interstitial-link', '.newsletter-inline-unit'],\n },\n};\n","export const WwwHuffingtonpostComExtractor = {\n domain: 'www.huffingtonpost.com',\n\n title: {\n selectors: ['h1.headline__title'],\n },\n\n author: {\n selectors: ['span.author-card__details__name'],\n },\n\n date_published: {\n selectors: [\n ['meta[name=\"article:modified_time\"]', 'value'],\n ['meta[name=\"article:published_time\"]', 'value'],\n ],\n },\n\n dek: {\n selectors: ['h2.headline__subtitle'],\n },\n\n lead_image_url: {\n selectors: [['meta[name=\"og:image\"]', 'value']],\n },\n\n content: {\n selectors: ['div.entry__body'],\n\n defaultCleaner: false,\n\n // Is there anything in the content you selected that needs transformed\n // before it's consumable content? E.g., unusual lazy loaded images\n transforms: {},\n\n // Is there anything that is in the result that shouldn't be?\n // The clean selectors will remove anything that matches from\n // the result\n clean: [\n '.pull-quote',\n '.tag-cloud',\n '.embed-asset',\n '.below-entry',\n '.entry-corrections',\n '#suggested-story',\n ],\n },\n};\n","export const NewrepublicComExtractor = {\n domain: 'newrepublic.com',\n\n title: {\n selectors: ['h1.article-headline', '.minutes-primary h1.minute-title'],\n },\n\n author: {\n selectors: ['div.author-list', '.minutes-primary h3.minute-byline'],\n },\n\n date_published: {\n selectors: [['meta[name=\"article:published_time\"]', 'value']],\n\n timezone: 'America/New_York',\n },\n\n dek: {\n selectors: ['h2.article-subhead'],\n },\n\n lead_image_url: {\n selectors: [['meta[name=\"og:image\"]', 'value']],\n },\n\n content: {\n selectors: [\n ['.article-cover', 'div.content-body'],\n ['.minute-image', '.minutes-primary div.content-body'],\n ],\n\n // Is there anything in the content you selected that needs transformed\n // before it's consumable content? E.g., unusual lazy loaded images\n transforms: {},\n\n // Is there anything that is in the result that shouldn't be?\n // The clean selectors will remove anything that matches from\n // the result\n clean: ['aside'],\n },\n};\n","export const MoneyCnnComExtractor = {\n domain: 'money.cnn.com',\n\n title: {\n selectors: ['.article-title'],\n },\n\n author: {\n selectors: ['.byline a'],\n },\n\n date_published: {\n selectors: [['meta[name=\"date\"]', 'value']],\n\n timezone: 'GMT',\n },\n\n dek: {\n selectors: ['#storytext h2'],\n },\n\n lead_image_url: {\n selectors: [['meta[name=\"og:image\"]', 'value']],\n },\n\n content: {\n selectors: ['#storytext'],\n\n // Is there anything in the content you selected that needs transformed\n // before it's consumable content? E.g., unusual lazy loaded images\n transforms: {},\n\n // Is there anything that is in the result that shouldn't be?\n // The clean selectors will remove anything that matches from\n // the result\n clean: ['.inStoryHeading'],\n },\n};\n","export const WwwThevergeComExtractor = {\n domain: 'www.theverge.com',\n\n supportedDomains: ['www.polygon.com'],\n\n title: {\n selectors: ['h1'],\n },\n\n author: {\n selectors: [['meta[name=\"author\"]', 'value']],\n },\n\n date_published: {\n selectors: [['meta[name=\"article:published_time\"]', 'value']],\n },\n\n dek: {\n selectors: ['h2.p-dek'],\n },\n\n lead_image_url: {\n selectors: [['meta[name=\"og:image\"]', 'value']],\n },\n\n content: {\n selectors: [\n // feature template multi-match\n ['.c-entry-hero .e-image', '.c-entry-intro', '.c-entry-content'],\n // regular post multi-match\n ['.e-image--hero', '.c-entry-content'],\n // feature template fallback\n '.l-wrapper .l-feature',\n // regular post fallback\n 'div.c-entry-content',\n ],\n\n // Transform lazy-loaded images\n transforms: {\n noscript: $node => {\n const $children = $node.children();\n if ($children.length === 1 && $children.get(0).tagName === 'img') {\n return 'span';\n }\n\n return null;\n },\n },\n\n // Is there anything that is in the result that shouldn't be?\n // The clean selectors will remove anything that matches from\n // the result\n clean: [\n '.aside',\n 'img.c-dynamic-image', // images come from noscript transform\n ],\n },\n};\n","export const WwwCnnComExtractor = {\n domain: 'www.cnn.com',\n\n title: {\n selectors: ['h1.pg-headline', 'h1'],\n },\n\n author: {\n selectors: ['.metadata__byline__author'],\n },\n\n date_published: {\n selectors: [['meta[name=\"pubdate\"]', 'value']],\n },\n\n lead_image_url: {\n selectors: [['meta[name=\"og:image\"]', 'value']],\n },\n\n content: {\n selectors: [\n // a more specific selector to grab the lead image and the body\n ['.media__video--thumbnail', '.zn-body-text'],\n // a fallback for the above\n '.zn-body-text',\n 'div[itemprop=\"articleBody\"]',\n ],\n\n // Is there anything in the content you selected that needs transformed\n // before it's consumable content? E.g., unusual lazy loaded images\n transforms: {\n '.zn-body__paragraph, .el__leafmedia--sourced-paragraph': $node => {\n const $text = $node.html();\n if ($text) {\n return 'p';\n }\n\n return null;\n },\n\n // this transform cleans the short, all-link sections linking\n // to related content but not marked as such in any way.\n '.zn-body__paragraph': $node => {\n if ($node.has('a')) {\n if (\n $node.text().trim() ===\n $node\n .find('a')\n .text()\n .trim()\n ) {\n $node.remove();\n }\n }\n },\n\n '.media__video--thumbnail': 'figure',\n },\n\n // Is there anything that is in the result that shouldn't be?\n // The clean selectors will remove anything that matches from\n // the result\n clean: [],\n },\n};\n","export const WwwAolComExtractor = {\n domain: 'www.aol.com',\n\n title: {\n selectors: ['h1.p-article__title'],\n },\n\n author: {\n selectors: [['meta[name=\"author\"]', 'value']],\n },\n\n date_published: {\n selectors: ['.p-article__byline__date'],\n\n timezone: 'America/New_York',\n },\n\n dek: {\n selectors: [\n // enter selectors\n ],\n },\n\n lead_image_url: {\n selectors: [['meta[name=\"og:image\"]', 'value']],\n },\n\n content: {\n selectors: ['.article-content'],\n\n // Is there anything in the content you selected that needs transformed\n // before it's consumable content? E.g., unusual lazy loaded images\n transforms: {},\n\n // Is there anything that is in the result that shouldn't be?\n // The clean selectors will remove anything that matches from\n // the result\n clean: [],\n },\n};\n","export const WwwYoutubeComExtractor = {\n domain: 'www.youtube.com',\n\n title: {\n selectors: ['.watch-title', 'h1.watch-title-container'],\n },\n\n author: {\n selectors: ['.yt-user-info'],\n },\n\n date_published: {\n selectors: [['meta[itemProp=\"datePublished\"]', 'value']],\n\n timezone: 'GMT',\n },\n\n dek: {\n selectors: [\n // enter selectors\n ],\n },\n\n lead_image_url: {\n selectors: [['meta[name=\"og:image\"]', 'value']],\n },\n\n content: {\n defaultCleaner: false,\n\n selectors: [['#player-api', '#eow-description']],\n\n // Is there anything in the content you selected that needs transformed\n // before it's consumable content? E.g., unusual lazy loaded images\n transforms: {\n '#player-api': ($node, $) => {\n const videoId = $('meta[itemProp=\"videoId\"]').attr('value');\n $node.html(`\n `);\n },\n },\n\n // Is there anything that is in the result that shouldn't be?\n // The clean selectors will remove anything that matches from\n // the result\n clean: [],\n },\n};\n","export const WwwTheguardianComExtractor = {\n domain: 'www.theguardian.com',\n\n title: {\n selectors: ['.content__headline'],\n },\n\n author: {\n selectors: ['p.byline'],\n },\n\n date_published: {\n selectors: [['meta[name=\"article:published_time\"]', 'value']],\n },\n\n dek: {\n selectors: ['.content__standfirst'],\n },\n\n lead_image_url: {\n selectors: [['meta[name=\"og:image\"]', 'value']],\n },\n\n content: {\n selectors: ['.content__article-body'],\n\n // Is there anything in the content you selected that needs transformed\n // before it's consumable content? E.g., unusual lazy loaded images\n transforms: {},\n\n // Is there anything that is in the result that shouldn't be?\n // The clean selectors will remove anything that matches from\n // the result\n clean: ['.hide-on-mobile', '.inline-icon'],\n },\n};\n","export const WwwSbnationComExtractor = {\n domain: 'www.sbnation.com',\n\n title: {\n selectors: ['h1.c-page-title'],\n },\n\n author: {\n selectors: [['meta[name=\"author\"]', 'value']],\n },\n\n date_published: {\n selectors: [['meta[name=\"article:published_time\"]', 'value']],\n },\n\n dek: {\n selectors: ['h2.c-entry-summary.p-dek'],\n },\n\n lead_image_url: {\n selectors: [['meta[name=\"og:image\"]', 'value']],\n },\n\n content: {\n selectors: ['div.c-entry-content'],\n\n // Is there anything in the content you selected that needs transformed\n // before it's consumable content? E.g., unusual lazy loaded images\n transforms: {},\n\n // Is there anything that is in the result that shouldn't be?\n // The clean selectors will remove anything that matches from\n // the result\n clean: [],\n },\n};\n","export const WwwBloombergComExtractor = {\n domain: 'www.bloomberg.com',\n\n title: {\n selectors: [\n // normal articles\n '.lede-headline',\n\n // /graphics/ template\n 'h1.article-title',\n\n // /news/ template\n 'h1.lede-text-only__hed',\n ],\n },\n\n author: {\n selectors: [\n ['meta[name=\"parsely-author\"]', 'value'],\n '.byline-details__link',\n\n // /graphics/ template\n '.bydek',\n\n // /news/ template\n '.author',\n ],\n },\n\n date_published: {\n selectors: [\n ['time.published-at', 'datetime'],\n ['time[datetime]', 'datetime'],\n ['meta[name=\"date\"]', 'value'],\n ['meta[name=\"parsely-pub-date\"]', 'value'],\n ],\n },\n\n dek: {\n selectors: [],\n },\n\n lead_image_url: {\n selectors: [['meta[name=\"og:image\"]', 'value']],\n },\n\n content: {\n selectors: [\n '.article-body__content',\n\n // /graphics/ template\n ['section.copy-block'],\n\n // /news/ template\n '.body-copy',\n ],\n\n // Is there anything in the content you selected that needs transformed\n // before it's consumable content? E.g., unusual lazy loaded images\n transforms: {},\n\n // Is there anything that is in the result that shouldn't be?\n // The clean selectors will remove anything that matches from\n // the result\n clean: ['.inline-newsletter', '.page-ad'],\n },\n};\n","export const WwwBustleComExtractor = {\n domain: 'www.bustle.com',\n\n title: {\n selectors: ['h1.post-page__title'],\n },\n\n author: {\n selectors: ['div.content-meta__author'],\n },\n\n date_published: {\n selectors: [['time.content-meta__published-date[datetime]', 'datetime']],\n },\n\n lead_image_url: {\n selectors: [['meta[name=\"og:image\"]', 'value']],\n },\n\n content: {\n selectors: ['.post-page__body'],\n\n // Is there anything in the content you selected that needs transformed\n // before it's consumable content? E.g., unusual lazy loaded images\n transforms: {},\n\n // Is there anything that is in the result that shouldn't be?\n // The clean selectors will remove anything that matches from\n // the result\n clean: [],\n },\n};\n","export const WwwNprOrgExtractor = {\n domain: 'www.npr.org',\n\n title: {\n selectors: ['h1', '.storytitle'],\n },\n\n author: {\n selectors: ['p.byline__name.byline__name--block'],\n },\n\n date_published: {\n selectors: [\n ['.dateblock time[datetime]', 'datetime'],\n ['meta[name=\"date\"]', 'value'],\n ],\n },\n\n lead_image_url: {\n selectors: [\n ['meta[name=\"og:image\"]', 'value'],\n ['meta[name=\"twitter:image:src\"]', 'value'],\n ],\n },\n\n content: {\n selectors: ['.storytext'],\n\n // Is there anything in the content you selected that needs transformed\n // before it's consumable content? E.g., unusual lazy loaded images\n transforms: {\n '.bucketwrap.image': 'figure',\n '.bucketwrap.image .credit-caption': 'figcaption',\n },\n\n // Is there anything that is in the result that shouldn't be?\n // The clean selectors will remove anything that matches from\n // the result\n clean: ['div.enlarge_measure'],\n },\n};\n","export const WwwRecodeNetExtractor = {\n domain: 'www.recode.net',\n\n title: {\n selectors: ['h1.c-page-title'],\n },\n\n author: {\n selectors: [['meta[name=\"author\"]', 'value']],\n },\n\n date_published: {\n selectors: [['meta[name=\"article:published_time\"]', 'value']],\n },\n\n dek: {\n selectors: ['h2.c-entry-summary.p-dek'],\n },\n\n lead_image_url: {\n selectors: [['meta[name=\"og:image\"]', 'value']],\n },\n\n content: {\n selectors: [\n ['figure.e-image--hero', '.c-entry-content'],\n '.c-entry-content',\n ],\n\n // Is there anything in the content you selected that needs transformed\n // before it's consumable content? E.g., unusual lazy loaded images\n transforms: {},\n\n // Is there anything that is in the result that shouldn't be?\n // The clean selectors will remove anything that matches from\n // the result\n clean: [],\n },\n};\n","export const QzComExtractor = {\n domain: 'qz.com',\n\n title: {\n selectors: ['article header h1'],\n },\n\n author: {\n selectors: [['meta[name=\"author\"]', 'value']],\n },\n\n date_published: {\n selectors: [['time[datetime]', 'datetime']],\n },\n\n lead_image_url: {\n selectors: [\n ['meta[name=\"og:image\"]', 'value'],\n ['meta[property=\"og:image\"]', 'content'],\n ['meta[name=\"twitter:image\"]', 'content'],\n ],\n },\n\n content: {\n selectors: ['#article-content'],\n\n // Is there anything in the content you selected that needs transformed\n // before it's consumable content? E.g., unusual lazy loaded images\n transforms: {},\n\n // Is there anything that is in the result that shouldn't be?\n // The clean selectors will remove anything that matches from\n // the result\n clean: [],\n },\n};\n","export const WwwDmagazineComExtractor = {\n domain: 'www.dmagazine.com',\n\n title: {\n selectors: ['h1.story__title'],\n },\n\n author: {\n selectors: ['.story__info .story__info__item:first-child'],\n },\n\n date_published: {\n selectors: [\n // enter selectors\n '.story__info',\n ],\n\n timezone: 'America/Chicago',\n format: 'MMMM D, YYYY h:mm a',\n },\n\n dek: {\n selectors: ['.story__subhead'],\n },\n\n lead_image_url: {\n selectors: [['article figure a:first-child', 'href']],\n },\n\n content: {\n selectors: ['.story__content'],\n\n // Is there anything in the content you selected that needs transformed\n // before it's consumable content? E.g., unusual lazy loaded images\n transforms: {},\n\n // Is there anything that is in the result that shouldn't be?\n // The clean selectors will remove anything that matches from\n // the result\n clean: [],\n },\n};\n","export const WwwReutersComExtractor = {\n domain: 'www.reuters.com',\n\n title: {\n selectors: ['h1.article-headline'],\n },\n\n author: {\n selectors: ['.author'],\n },\n\n date_published: {\n selectors: [['meta[name=\"og:article:published_time\"]', 'value']],\n },\n\n lead_image_url: {\n selectors: [['meta[name=\"og:image\"]', 'value']],\n },\n\n content: {\n selectors: ['#article-text'],\n\n // Is there anything in the content you selected that needs transformed\n // before it's consumable content? E.g., unusual lazy loaded images\n transforms: {\n '.article-subtitle': 'h4',\n },\n\n // Is there anything that is in the result that shouldn't be?\n // The clean selectors will remove anything that matches from\n // the result\n clean: ['#article-byline .author'],\n },\n};\n","export const MashableComExtractor = {\n domain: 'mashable.com',\n\n title: {\n selectors: ['h1.title'],\n },\n\n author: {\n selectors: ['span.author_name a'],\n },\n\n date_published: {\n selectors: [['meta[name=\"og:article:published_time\"]', 'value']],\n },\n\n lead_image_url: {\n selectors: [['meta[name=\"og:image\"]', 'value']],\n },\n\n content: {\n selectors: ['section.article-content.blueprint'],\n\n // Is there anything in the content you selected that needs transformed\n // before it's consumable content? E.g., unusual lazy loaded images\n transforms: {\n '.image-credit': 'figcaption',\n },\n\n // Is there anything that is in the result that shouldn't be?\n // The clean selectors will remove anything that matches from\n // the result\n clean: [],\n },\n};\n","export const WwwChicagotribuneComExtractor = {\n domain: 'www.chicagotribune.com',\n\n title: {\n selectors: ['h1.trb_ar_hl_t'],\n },\n\n author: {\n selectors: ['span.trb_ar_by_nm_au'],\n },\n\n date_published: {\n selectors: [['meta[itemprop=\"datePublished\"]', 'value']],\n },\n\n lead_image_url: {\n selectors: [['meta[name=\"og:image\"]', 'value']],\n },\n\n content: {\n selectors: ['div.trb_ar_page'],\n\n // Is there anything in the content you selected that needs transformed\n // before it's consumable content? E.g., unusual lazy loaded images\n transforms: {},\n\n // Is there anything that is in the result that shouldn't be?\n // The clean selectors will remove anything that matches from\n // the result\n clean: [],\n },\n};\n","export const WwwVoxComExtractor = {\n domain: 'www.vox.com',\n\n title: {\n selectors: ['h1.c-page-title'],\n },\n\n author: {\n selectors: [['meta[name=\"author\"]', 'value']],\n },\n\n date_published: {\n selectors: [['meta[name=\"article:published_time\"]', 'value']],\n },\n\n dek: {\n selectors: ['.p-dek'],\n },\n\n lead_image_url: {\n selectors: [['meta[name=\"og:image\"]', 'value']],\n },\n\n content: {\n selectors: [\n ['figure.e-image--hero', '.c-entry-content'],\n '.c-entry-content',\n ],\n\n // Is there anything in the content you selected that needs transformed\n // before it's consumable content? E.g., unusual lazy loaded images\n transforms: {\n 'figure .e-image__image noscript': $node => {\n const imgHtml = $node.html();\n $node\n .parents('.e-image__image')\n .find('.c-dynamic-image')\n .replaceWith(imgHtml);\n },\n\n 'figure .e-image__meta': 'figcaption',\n },\n\n // Is there anything that is in the result that shouldn't be?\n // The clean selectors will remove anything that matches from\n // the result\n clean: [],\n },\n};\n","export const NewsNationalgeographicComExtractor = {\n domain: 'news.nationalgeographic.com',\n\n title: {\n selectors: ['h1', 'h1.main-title'],\n },\n\n author: {\n selectors: ['.byline-component__contributors b span'],\n },\n\n date_published: {\n selectors: [['meta[name=\"article:published_time\"]', 'value']],\n format: 'ddd MMM DD HH:mm:ss zz YYYY',\n timezone: 'EST',\n },\n\n dek: {\n selectors: ['.article__deck'],\n },\n\n lead_image_url: {\n selectors: [['meta[name=\"og:image\"]', 'value']],\n },\n\n content: {\n selectors: [['.parsys.content', '.__image-lead__'], '.content'],\n\n // Is there anything in the content you selected that needs transformed\n // before it's consumable content? E.g., unusual lazy loaded images\n transforms: {\n '.parsys.content': ($node, $) => {\n const $imgSrc = $node\n .find('.image.parbase.section')\n .find('.picturefill')\n .first()\n .data('platform-src');\n if ($imgSrc) {\n $node.prepend($(``));\n }\n },\n },\n\n // Is there anything that is in the result that shouldn't be?\n // The clean selectors will remove anything that matches from\n // the result\n clean: ['.pull-quote.pull-quote--large'],\n },\n};\n","export const WwwNationalgeographicComExtractor = {\n domain: 'www.nationalgeographic.com',\n\n title: {\n selectors: ['h1', 'h1.main-title'],\n },\n\n author: {\n selectors: ['.byline-component__contributors b span'],\n },\n\n date_published: {\n selectors: [['meta[name=\"article:published_time\"]', 'value']],\n },\n\n dek: {\n selectors: ['.article__deck'],\n },\n\n lead_image_url: {\n selectors: [['meta[name=\"og:image\"]', 'value']],\n },\n\n content: {\n selectors: [['.parsys.content', '.__image-lead__'], '.content'],\n\n // Is there anything in the content you selected that needs transformed\n // before it's consumable content? E.g., unusual lazy loaded images\n transforms: {\n '.parsys.content': ($node, $) => {\n const $imageParent = $node.children().first();\n if ($imageParent.hasClass('imageGroup')) {\n const $dataAttrContainer = $imageParent\n .find('.media--medium__container')\n .children()\n .first();\n const imgPath1 = $dataAttrContainer.data('platform-image1-path');\n const imgPath2 = $dataAttrContainer.data('platform-image2-path');\n if (imgPath2 && imgPath1) {\n $node.prepend(\n $(`
\n \n \n
`)\n );\n }\n } else {\n const $imgSrc = $node\n .find('.image.parbase.section')\n .find('.picturefill')\n .first()\n .data('platform-src');\n if ($imgSrc) {\n $node.prepend($(``));\n }\n }\n },\n },\n\n // Is there anything that is in the result that shouldn't be?\n // The clean selectors will remove anything that matches from\n // the result\n clean: ['.pull-quote.pull-quote--small'],\n },\n};\n","export const WwwLatimesComExtractor = {\n domain: 'www.latimes.com',\n\n title: {\n selectors: ['.trb_ar_hl'],\n },\n\n author: {\n selectors: [['meta[name=\"author\"]', 'value']],\n },\n\n date_published: {\n selectors: [['meta[itemprop=\"datePublished\"]', 'value']],\n },\n\n lead_image_url: {\n selectors: [['meta[name=\"og:image\"]', 'value']],\n },\n\n content: {\n selectors: ['.trb_ar_main'],\n\n // Is there anything in the content you selected that needs transformed\n // before it's consumable content? E.g., unusual lazy loaded images\n transforms: {\n '.trb_ar_la': $node => {\n const $figure = $node.find('figure');\n $node.replaceWith($figure);\n },\n },\n\n // Is there anything that is in the result that shouldn't be?\n // The clean selectors will remove anything that matches from\n // the result\n clean: ['.trb_ar_by', '.trb_ar_cr'],\n },\n};\n","export const PagesixComExtractor = {\n domain: 'pagesix.com',\n\n supportedDomains: ['nypost.com'],\n\n title: {\n selectors: ['h1 a'],\n },\n\n author: {\n selectors: ['.byline'],\n },\n\n date_published: {\n selectors: [['meta[name=\"article:published_time\"]', 'value']],\n },\n\n dek: {\n selectors: [['meta[name=\"description\"]', 'value']],\n },\n\n lead_image_url: {\n selectors: [['meta[name=\"og:image\"]', 'value']],\n },\n\n content: {\n selectors: [\n ['#featured-image-wrapper', '.entry-content'],\n '.entry-content',\n ],\n\n // Is there anything in the content you selected that needs transformed\n // before it's consumable content? E.g., unusual lazy loaded images\n transforms: {\n '#featured-image-wrapper': 'figure',\n '.wp-caption-text': 'figcaption',\n },\n\n // Is there anything that is in the result that shouldn't be?\n // The clean selectors will remove anything that matches from\n // the result\n clean: ['.modal-trigger'],\n },\n};\n","export const ThefederalistpapersOrgExtractor = {\n domain: 'thefederalistpapers.org',\n\n title: {\n selectors: ['h1.entry-title'],\n },\n\n author: {\n selectors: ['main span.entry-author-name'],\n },\n\n date_published: {\n selectors: [['meta[name=\"article:published_time\"]', 'value']],\n },\n\n lead_image_url: {\n selectors: [['meta[name=\"og:image\"]', 'value']],\n },\n\n content: {\n selectors: ['.entry-content'],\n\n // Is there anything in the content you selected that needs transformed\n // before it's consumable content? E.g., unusual lazy loaded images\n transforms: {},\n\n // Is there anything that is in the result that shouldn't be?\n // The clean selectors will remove anything that matches from\n // the result\n clean: [['p[style]']],\n },\n};\n","export const WwwCbssportsComExtractor = {\n domain: 'www.cbssports.com',\n\n title: {\n selectors: ['.article-headline'],\n },\n\n author: {\n selectors: ['.author-name'],\n },\n\n date_published: {\n selectors: [['.date-original-reading-time time', 'datetime']],\n timezone: 'UTC',\n },\n\n dek: {\n selectors: ['.article-subline'],\n },\n\n lead_image_url: {\n selectors: [['meta[name=\"og:image\"]', 'value']],\n },\n\n content: {\n selectors: ['.article'],\n\n // Is there anything in the content you selected that needs transformed\n // before it's consumable content? E.g., unusual lazy loaded images\n transforms: {},\n\n // Is there anything that is in the result that shouldn't be?\n // The clean selectors will remove anything that matches from\n // the result\n clean: [],\n },\n};\n","export const WwwMsnbcComExtractor = {\n domain: 'www.msnbc.com',\n\n title: {\n selectors: ['h1', 'h1.is-title-pane'],\n },\n\n author: {\n selectors: ['.author'],\n },\n\n date_published: {\n selectors: [['meta[name=\"DC.date.issued\"]', 'value']],\n },\n\n dek: {\n selectors: [['meta[name=\"description\"]', 'value']],\n },\n\n lead_image_url: {\n selectors: [['meta[name=\"og:image\"]', 'value']],\n },\n\n content: {\n selectors: ['.pane-node-body'],\n\n // Is there anything in the content you selected that needs transformed\n // before it's consumable content? E.g., unusual lazy loaded images\n transforms: {\n '.pane-node-body': ($node, $) => {\n const [\n selector,\n attr,\n ] = WwwMsnbcComExtractor.lead_image_url.selectors[0];\n const src = $(selector).attr(attr);\n if (src) {\n $node.prepend(``);\n }\n },\n },\n\n // Is there anything that is in the result that shouldn't be?\n // The clean selectors will remove anything that matches from\n // the result\n clean: [],\n },\n};\n","export const WwwThepoliticalinsiderComExtractor = {\n domain: 'www.thepoliticalinsider.com',\n\n title: {\n selectors: [['meta[name=\"sailthru.title\"]', 'value']],\n },\n\n author: {\n selectors: [['meta[name=\"sailthru.author\"]', 'value']],\n },\n\n date_published: {\n selectors: [['meta[name=\"sailthru.date\"]', 'value']],\n timezone: 'America/New_York',\n },\n\n dek: {\n selectors: [\n // enter selectors\n ],\n },\n\n lead_image_url: {\n selectors: [\n ['meta[name=\"og:image\"]', 'value'], // enter selectors\n ],\n },\n\n content: {\n selectors: ['div#article-body'],\n\n // Is there anything in the content you selected that needs transformed\n // before it's consumable content? E.g., unusual lazy loaded images\n transforms: {},\n\n // Is there anything that is in the result that shouldn't be?\n // The clean selectors will remove anything that matches from\n // the result\n clean: [],\n },\n};\n","export const WwwMentalflossComExtractor = {\n domain: 'www.mentalfloss.com',\n\n title: {\n selectors: ['h1.title', '.title-group', '.inner'],\n },\n\n author: {\n selectors: ['.field-name-field-enhanced-authors'],\n },\n\n date_published: {\n selectors: ['.date-display-single'],\n timezone: 'America/New_York',\n },\n\n lead_image_url: {\n selectors: [['meta[name=\"og:image\"]', 'value']],\n },\n\n content: {\n selectors: ['div.field.field-name-body'],\n\n // Is there anything in the content you selected that needs transformed\n // before it's consumable content? E.g., unusual lazy loaded images\n transforms: {},\n\n // Is there anything that is in the result that shouldn't be?\n // The clean selectors will remove anything that matches from\n // the result\n clean: [],\n },\n};\n","export const AbcnewsGoComExtractor = {\n domain: 'abcnews.go.com',\n\n title: {\n selectors: ['.article-header h1'],\n },\n\n author: {\n selectors: ['.authors'],\n clean: ['.author-overlay', '.by-text'],\n },\n\n date_published: {\n selectors: ['.timestamp'],\n timezone: 'America/New_York',\n },\n\n lead_image_url: {\n selectors: [['meta[name=\"og:image\"]', 'value']],\n },\n\n content: {\n selectors: ['.article-copy'],\n\n // Is there anything in the content you selected that needs transformed\n // before it's consumable content? E.g., unusual lazy loaded images\n transforms: {},\n\n // Is there anything that is in the result that shouldn't be?\n // The clean selectors will remove anything that matches from\n // the result\n clean: [],\n },\n};\n","export const WwwNydailynewsComExtractor = {\n domain: 'www.nydailynews.com',\n\n title: {\n selectors: ['h1#ra-headline'],\n },\n\n author: {\n selectors: [['meta[name=\"parsely-author\"]', 'value']],\n },\n\n date_published: {\n selectors: [['meta[name=\"sailthru.date\"]', 'value']],\n },\n\n lead_image_url: {\n selectors: [['meta[name=\"og:image\"]', 'value']],\n },\n\n content: {\n selectors: ['article#ra-body'],\n\n // Is there anything in the content you selected that needs transformed\n // before it's consumable content? E.g., unusual lazy loaded images\n transforms: {},\n\n // Is there anything that is in the result that shouldn't be?\n // The clean selectors will remove anything that matches from\n // the result\n clean: ['dl#ra-tags', '.ra-related', 'a.ra-editor', 'dl#ra-share-bottom'],\n },\n};\n","export const WwwCnbcComExtractor = {\n domain: 'www.cnbc.com',\n\n title: {\n selectors: ['h1.title', 'h1.ArticleHeader-headline'],\n },\n\n author: {\n selectors: [['meta[name=\"author\"]', 'value']],\n },\n\n date_published: {\n selectors: [['meta[name=\"article:published_time\"]', 'value']],\n },\n\n lead_image_url: {\n selectors: [['meta[name=\"og:image\"]', 'value']],\n },\n\n content: {\n selectors: [\n 'div#article_body.content',\n 'div.story',\n 'div.ArticleBody-articleBody',\n ],\n\n // Is there anything in the content you selected that needs transformed\n // before it's consumable content? E.g., unusual lazy loaded images\n transforms: {},\n\n // Is there anything that is in the result that shouldn't be?\n // The clean selectors will remove anything that matches from\n // the result\n clean: [],\n },\n};\n","export const WwwPopsugarComExtractor = {\n domain: 'www.popsugar.com',\n\n title: {\n selectors: ['h2.post-title', 'title-text'],\n },\n\n author: {\n selectors: [['meta[name=\"article:author\"]', 'value']],\n },\n\n date_published: {\n selectors: [['meta[name=\"article:published_time\"]', 'value']],\n },\n\n lead_image_url: {\n selectors: [['meta[name=\"og:image\"]', 'value']],\n },\n\n content: {\n selectors: ['#content'],\n\n // Is there anything in the content you selected that needs transformed\n // before it's consumable content? E.g., unusual lazy loaded images\n transforms: {},\n\n // Is there anything that is in the result that shouldn't be?\n // The clean selectors will remove anything that matches from\n // the result\n clean: ['.share-copy-title', '.post-tags', '.reactions'],\n },\n};\n","export const ObserverComExtractor = {\n domain: 'observer.com',\n\n title: {\n selectors: ['h1.entry-title'],\n },\n\n author: {\n selectors: ['.author', '.vcard'],\n },\n\n date_published: {\n selectors: [['meta[name=\"article:published_time\"]', 'value']],\n },\n\n dek: {\n selectors: ['h2.dek'],\n },\n\n lead_image_url: {\n selectors: [['meta[name=\"og:image\"]', 'value']],\n },\n\n content: {\n selectors: ['div.entry-content'],\n\n // Is there anything in the content you selected that needs transformed\n // before it's consumable content? E.g., unusual lazy loaded images\n transforms: {},\n\n // Is there anything that is in the result that shouldn't be?\n // The clean selectors will remove anything that matches from\n // the result\n clean: [],\n },\n};\n","export const PeopleComExtractor = {\n domain: 'people.com',\n\n title: {\n selectors: [['meta[name=\"og:title\"]', 'value']],\n },\n\n author: {\n selectors: ['a.author.url.fn'],\n },\n\n date_published: {\n selectors: [['meta[name=\"article:published_time\"]', 'value']],\n },\n\n lead_image_url: {\n selectors: [['meta[name=\"og:image\"]', 'value']],\n },\n\n content: {\n selectors: ['div.article-body__inner'],\n\n // Is there anything in the content you selected that needs transformed\n // before it's consumable content? E.g., unusual lazy loaded images\n transforms: {},\n\n // Is there anything that is in the result that shouldn't be?\n // The clean selectors will remove anything that matches from\n // the result\n clean: [],\n },\n};\n","export const WwwUsmagazineComExtractor = {\n domain: 'www.usmagazine.com',\n\n title: {\n selectors: ['header h1'],\n },\n\n author: {\n selectors: ['a.article-byline.tracked-offpage'],\n },\n\n date_published: {\n timezone: 'America/New_York',\n\n selectors: ['time.article-published-date'],\n },\n\n lead_image_url: {\n selectors: [['meta[name=\"og:image\"]', 'value']],\n },\n\n content: {\n selectors: ['div.article-body-inner'],\n\n // Is there anything in the content you selected that needs transformed\n // before it's consumable content? E.g., unusual lazy loaded images\n transforms: {},\n\n // Is there anything that is in the result that shouldn't be?\n // The clean selectors will remove anything that matches from\n // the result\n clean: ['.module-related'],\n },\n};\n","export const WwwRollingstoneComExtractor = {\n domain: 'www.rollingstone.com',\n\n title: {\n selectors: ['h1.content-title'],\n },\n\n author: {\n selectors: ['a.content-author.tracked-offpage'],\n },\n\n date_published: {\n selectors: ['time.content-published-date'],\n\n timezone: 'America/New_York',\n },\n\n dek: {\n selectors: ['.content-description'],\n },\n\n lead_image_url: {\n selectors: [['meta[name=\"og:image\"]', 'value']],\n },\n\n content: {\n selectors: [['.lead-container', '.article-content'], '.article-content'],\n\n // Is there anything in the content you selected that needs transformed\n // before it's consumable content? E.g., unusual lazy loaded images\n transforms: {},\n\n // Is there anything that is in the result that shouldn't be?\n // The clean selectors will remove anything that matches from\n // the result\n clean: ['.module-related'],\n },\n};\n","export const twofortysevensportsComExtractor = {\n domain: '247sports.com',\n\n title: {\n selectors: ['title', 'article header h1'],\n },\n\n author: {\n selectors: ['.author'],\n },\n\n date_published: {\n selectors: [['time[data-published]', 'data-published']],\n },\n\n lead_image_url: {\n selectors: [['meta[name=\"og:image\"]', 'value']],\n },\n\n content: {\n selectors: ['section.body.article'],\n\n // Is there anything in the content you selected that needs transformed\n // before it's consumable content? E.g., unusual lazy loaded images\n transforms: {},\n\n // Is there anything that is in the result that shouldn't be?\n // The clean selectors will remove anything that matches from\n // the result\n clean: [],\n },\n};\n","export const UproxxComExtractor = {\n domain: 'uproxx.com',\n\n title: {\n selectors: ['div.post-top h1'],\n },\n\n author: {\n selectors: ['.post-top .authorname'],\n },\n\n date_published: {\n selectors: [['meta[name=\"article:published_time\"]', 'value']],\n },\n\n lead_image_url: {\n selectors: [['meta[name=\"og:image\"]', 'value']],\n },\n\n content: {\n selectors: ['.post-body'],\n\n // Is there anything in the content you selected that needs transformed\n // before it's consumable content? E.g., unusual lazy loaded images\n transforms: {\n 'div.image': 'figure',\n 'div.image .wp-media-credit': 'figcaption',\n },\n\n // Is there anything that is in the result that shouldn't be?\n // The clean selectors will remove anything that matches from\n // the result\n clean: [],\n },\n};\n","export const WwwEonlineComExtractor = {\n domain: 'www.eonline.com',\n\n title: {\n selectors: ['h1.article__title'],\n },\n\n author: {\n selectors: ['.entry-meta__author a'],\n },\n\n date_published: {\n selectors: [['meta[itemprop=\"datePublished\"]', 'value']],\n },\n\n lead_image_url: {\n selectors: [['meta[name=\"og:image\"]', 'value']],\n },\n\n content: {\n selectors: [\n ['.post-content section, .post-content div.post-content__image'],\n ],\n\n // Is there anything in the content you selected that needs transformed\n // before it's consumable content? E.g., unusual lazy loaded images\n transforms: {\n 'div.post-content__image': 'figure',\n 'div.post-content__image .image__credits': 'figcaption',\n },\n\n // Is there anything that is in the result that shouldn't be?\n // The clean selectors will remove anything that matches from\n // the result\n clean: [],\n },\n};\n","export const WwwMiamiheraldComExtractor = {\n domain: 'www.miamiherald.com',\n\n title: {\n selectors: ['h1.title'],\n },\n\n date_published: {\n selectors: ['p.published-date'],\n\n timezone: 'America/New_York',\n },\n\n lead_image_url: {\n selectors: [['meta[name=\"og:image\"]', 'value']],\n },\n\n content: {\n selectors: ['div.dateline-storybody'],\n\n // Is there anything in the content you selected that needs transformed\n // before it's consumable content? E.g., unusual lazy loaded images\n transforms: {},\n\n // Is there anything that is in the result that shouldn't be?\n // The clean selectors will remove anything that matches from\n // the result\n clean: [],\n },\n};\n","export const WwwRefinery29ComExtractor = {\n domain: 'www.refinery29.com',\n\n title: {\n selectors: ['h1.title'],\n },\n\n author: {\n selectors: ['.contributor'],\n },\n\n date_published: {\n selectors: [['meta[name=\"sailthru.date\"]', 'value']],\n\n timezone: 'America/New_York',\n },\n\n lead_image_url: {\n selectors: [['meta[name=\"og:image\"]', 'value']],\n },\n\n content: {\n selectors: [\n ['.full-width-opener', '.article-content'],\n '.article-content',\n '.body',\n ],\n\n // Is there anything in the content you selected that needs transformed\n // before it's consumable content? E.g., unusual lazy loaded images\n transforms: {\n 'div.loading noscript': $node => {\n const imgHtml = $node.html();\n $node.parents('.loading').replaceWith(imgHtml);\n },\n\n '.section-image': 'figure',\n\n '.section-image .content-caption': 'figcaption',\n\n '.section-text': 'p',\n },\n\n // Is there anything that is in the result that shouldn't be?\n // The clean selectors will remove anything that matches from\n // the result\n clean: ['.story-share'],\n },\n};\n","export const WwwMacrumorsComExtractor = {\n domain: 'www.macrumors.com',\n\n title: {\n selectors: ['h1', 'h1.title'],\n },\n\n author: {\n selectors: ['.author-url'],\n },\n\n date_published: {\n selectors: ['.article .byline'],\n\n // Wednesday January 18, 2017 11:44 am PST\n format: 'dddd MMMM D, YYYY h:mm A zz',\n\n timezone: 'America/Los_Angeles',\n },\n\n dek: {\n selectors: [['meta[name=\"description\"]', 'value']],\n },\n\n lead_image_url: {\n selectors: [['meta[name=\"og:image\"]', 'value']],\n },\n\n content: {\n selectors: ['.article'],\n\n // Is there anything in the content you selected that needs transformed\n // before it's consumable content? E.g., unusual lazy loaded images\n transforms: {},\n\n // Is there anything that is in the result that shouldn't be?\n // The clean selectors will remove anything that matches from\n // the result\n clean: [],\n },\n};\n","export const WwwAndroidcentralComExtractor = {\n domain: 'www.androidcentral.com',\n\n title: {\n selectors: ['h1', 'h1.main-title'],\n },\n\n author: {\n selectors: ['.meta-by'],\n },\n\n date_published: {\n selectors: [['meta[name=\"article:published_time\"]', 'value']],\n },\n\n dek: {\n selectors: [['meta[name=\"og:description\"]', 'value']],\n },\n\n lead_image_url: {\n selectors: [['.image-large', 'src']],\n },\n\n content: {\n selectors: ['.article-body'],\n\n // Is there anything in the content you selected that needs transformed\n // before it's consumable content? E.g., unusual lazy loaded images\n transforms: {},\n\n // Is there anything that is in the result that shouldn't be?\n // The clean selectors will remove anything that matches from\n // the result\n clean: ['.intro', 'blockquote'],\n },\n};\n","export const WwwSiComExtractor = {\n domain: 'www.si.com',\n\n title: {\n selectors: ['h1', 'h1.headline'],\n },\n\n author: {\n selectors: [['meta[name=\"author\"]', 'value']],\n },\n\n date_published: {\n selectors: ['.timestamp'],\n\n timezone: 'America/New_York',\n },\n\n dek: {\n selectors: ['.quick-hit ul'],\n },\n\n lead_image_url: {\n selectors: [['meta[name=\"og:image\"]', 'value']],\n },\n\n content: {\n selectors: [['p', '.marquee_large_2x', '.component.image']],\n\n // Is there anything in the content you selected that needs transformed\n // before it's consumable content? E.g., unusual lazy loaded images\n transforms: {\n noscript: $node => {\n const $children = $node.children();\n if ($children.length === 1 && $children.get(0).tagName === 'img') {\n return 'figure';\n }\n\n return null;\n },\n },\n\n // Is there anything that is in the result that shouldn't be?\n // The clean selectors will remove anything that matches from\n // the result\n clean: [\n ['.inline-thumb', '.primary-message', '.description', '.instructions'],\n ],\n },\n};\n","export const WwwRawstoryComExtractor = {\n domain: 'www.rawstory.com',\n\n title: {\n selectors: ['.blog-title'],\n },\n\n author: {\n selectors: ['.blog-author a:first-of-type'],\n },\n\n date_published: {\n selectors: ['.blog-author a:last-of-type'],\n\n timezone: 'EST',\n },\n\n lead_image_url: {\n selectors: [['meta[name=\"og:image\"]', 'value']],\n },\n\n content: {\n selectors: ['.blog-content'],\n\n // Is there anything in the content you selected that needs transformed\n // before it's consumable content? E.g., unusual lazy loaded images\n transforms: {},\n\n // Is there anything that is in the result that shouldn't be?\n // The clean selectors will remove anything that matches from\n // the result\n clean: [],\n },\n};\n","export const WwwCnetComExtractor = {\n domain: 'www.cnet.com',\n\n title: {\n selectors: [['meta[name=\"og:title\"]', 'value']],\n },\n\n author: {\n selectors: ['a.author'],\n },\n\n date_published: {\n selectors: ['time'],\n\n timezone: 'America/Los_Angeles',\n },\n\n dek: {\n selectors: ['.article-dek'],\n },\n\n lead_image_url: {\n selectors: [['meta[name=\"og:image\"]', 'value']],\n },\n\n content: {\n selectors: [\n ['img.__image-lead__', '.article-main-body'],\n '.article-main-body',\n ],\n\n // Is there anything in the content you selected that needs transformed\n // before it's consumable content? E.g., unusual lazy loaded images\n transforms: {\n 'figure.image': $node => {\n const $img = $node.find('img');\n $img.attr('width', '100%');\n $img.attr('height', '100%');\n $img.addClass('__image-lead__');\n $node.remove('.imgContainer').prepend($img);\n },\n },\n\n // Is there anything that is in the result that shouldn't be?\n // The clean selectors will remove anything that matches from\n // the result\n clean: [],\n },\n};\n","export const WwwCinemablendComExtractor = {\n domain: 'www.cinemablend.com',\n\n title: {\n selectors: ['.story_title'],\n },\n\n author: {\n selectors: ['.author'],\n },\n\n date_published: {\n selectors: [['meta[name=\"article:published_time\"]', 'value']],\n\n timezone: 'EST',\n },\n\n lead_image_url: {\n selectors: [['meta[name=\"og:image\"]', 'value']],\n },\n\n content: {\n selectors: ['div#wrap_left_content'],\n\n // Is there anything in the content you selected that needs transformed\n // before it's consumable content? E.g., unusual lazy loaded images\n transforms: {},\n\n // Is there anything that is in the result that shouldn't be?\n // The clean selectors will remove anything that matches from\n // the result\n clean: [],\n },\n};\n","export const WwwTodayComExtractor = {\n domain: 'www.today.com',\n\n title: {\n selectors: ['h1.entry-headline'],\n },\n\n author: {\n selectors: [['meta[name=\"author\"]', 'value']],\n },\n\n date_published: {\n selectors: [['meta[name=\"DC.date.issued\"]', 'value']],\n },\n\n lead_image_url: {\n selectors: [['meta[name=\"og:image\"]', 'value']],\n },\n\n content: {\n selectors: ['.entry-container'],\n\n // Is there anything in the content you selected that needs transformed\n // before it's consumable content? E.g., unusual lazy loaded images\n transforms: {},\n\n // Is there anything that is in the result that shouldn't be?\n // The clean selectors will remove anything that matches from\n // the result\n clean: ['.label-comment'],\n },\n};\n","export const WwwHowtogeekComExtractor = {\n domain: 'www.howtogeek.com',\n\n title: {\n selectors: ['title'],\n },\n\n author: {\n selectors: ['#authorinfobox a'],\n },\n\n date_published: {\n selectors: ['#authorinfobox + div li'],\n timezone: 'GMT',\n },\n\n lead_image_url: {\n selectors: [['meta[name=\"og:image\"]', 'value']],\n },\n\n content: {\n selectors: ['.thecontent'],\n\n // Is there anything in the content you selected that needs transformed\n // before it's consumable content? E.g., unusual lazy loaded images\n transforms: {},\n\n // Is there anything that is in the result that shouldn't be?\n // The clean selectors will remove anything that matches from\n // the result\n clean: [],\n },\n};\n","export const WwwAlComExtractor = {\n domain: 'www.al.com',\n\n title: {\n selectors: [['meta[name=\"title\"]', 'value']],\n },\n\n author: {\n selectors: [['meta[name=\"article_author\"]', 'value']],\n },\n\n date_published: {\n selectors: [['meta[name=\"article_date_original\"]', 'value']],\n timezone: 'EST',\n },\n\n lead_image_url: {\n selectors: [['meta[name=\"og:image\"]', 'value']],\n },\n\n content: {\n selectors: ['.entry-content'],\n\n // Is there anything in the content you selected that needs transformed\n // before it's consumable content? E.g., unusual lazy loaded images\n transforms: {},\n\n // Is there anything that is in the result that shouldn't be?\n // The clean selectors will remove anything that matches from\n // the result\n clean: [],\n },\n};\n","export const WwwThepennyhoarderComExtractor = {\n domain: 'www.thepennyhoarder.com',\n\n title: {\n selectors: [['meta[name=\"dcterms.title\"]', 'value']],\n },\n\n author: {\n selectors: [['link[rel=\"author\"]', 'title']],\n },\n\n date_published: {\n selectors: [['meta[name=\"article:published_time\"]', 'value']],\n },\n\n lead_image_url: {\n selectors: [['meta[name=\"og:image\"]', 'value']],\n },\n\n content: {\n selectors: [['.post-img', '.post-text'], '.post-text'],\n\n // Is there anything in the content you selected that needs transformed\n // before it's consumable content? E.g., unusual lazy loaded images\n transforms: {},\n\n // Is there anything that is in the result that shouldn't be?\n // The clean selectors will remove anything that matches from\n // the result\n clean: [],\n },\n};\n","export const WwwWesternjournalismComExtractor = {\n domain: 'www.westernjournalism.com',\n\n title: {\n selectors: ['title', 'h1.entry-title'],\n },\n\n author: {\n selectors: [['meta[name=\"author\"]', 'value']],\n },\n\n date_published: {\n selectors: [['meta[name=\"DC.date.issued\"]', 'value']],\n },\n\n dek: {\n selectors: ['.subtitle'],\n },\n\n lead_image_url: {\n selectors: [['meta[name=\"og:image\"]', 'value']],\n },\n\n content: {\n selectors: ['div.article-sharing.top + div'],\n\n // Is there anything in the content you selected that needs transformed\n // before it's consumable content? E.g., unusual lazy loaded images\n transforms: {},\n\n // Is there anything that is in the result that shouldn't be?\n // The clean selectors will remove anything that matches from\n // the result\n clean: ['.ad-notice-small'],\n },\n};\n","export const FusionNetExtractor = {\n domain: 'fusion.net',\n\n title: {\n selectors: ['.post-title', '.single-title', '.headline'],\n },\n\n author: {\n selectors: ['.show-for-medium .byline'],\n },\n\n date_published: {\n selectors: [['time.local-time', 'datetime']],\n },\n\n dek: {\n selectors: [\n // enter selectors\n ],\n },\n\n lead_image_url: {\n selectors: [['meta[name=\"og:image\"]', 'value']],\n },\n\n content: {\n selectors: [\n ['.post-featured-media', '.article-content'],\n '.article-content',\n ],\n\n // Is there anything in the content you selected that needs transformed\n // before it's consumable content? E.g., unusual lazy loaded images\n transforms: {\n '.fusion-youtube-oembed': 'figure',\n },\n\n // Is there anything that is in the result that shouldn't be?\n // The clean selectors will remove anything that matches from\n // the result\n clean: [],\n },\n};\n","export const WwwAmericanowComExtractor = {\n domain: 'www.americanow.com',\n\n title: {\n selectors: ['.title', ['meta[name=\"title\"]', 'value']],\n },\n\n author: {\n selectors: ['.byline'],\n },\n\n date_published: {\n selectors: [['meta[name=\"publish_date\"]', 'value']],\n },\n\n dek: {\n selectors: [\n // enter selectors\n ],\n },\n\n lead_image_url: {\n selectors: [['meta[name=\"og:image\"]', 'value']],\n },\n\n content: {\n selectors: [['.article-content', '.image', '.body'], '.body'],\n\n // Is there anything in the content you selected that needs transformed\n // before it's consumable content? E.g., unusual lazy loaded images\n transforms: {},\n\n // Is there anything that is in the result that shouldn't be?\n // The clean selectors will remove anything that matches from\n // the result\n clean: ['.article-video-wrapper', '.show-for-small-only'],\n },\n};\n","export const ScienceflyComExtractor = {\n domain: 'sciencefly.com',\n\n title: {\n selectors: ['.entry-title', '.cb-entry-title', '.cb-single-title'],\n },\n\n author: {\n selectors: ['div.cb-author', 'div.cb-author-title'],\n },\n\n date_published: {\n selectors: [['meta[name=\"article:published_time\"]', 'value']],\n },\n\n dek: {\n selectors: [\n // enter selectors\n ],\n },\n\n lead_image_url: {\n selectors: [['div.theiaPostSlider_slides img', 'src']],\n },\n\n content: {\n selectors: ['div.theiaPostSlider_slides'],\n\n // Is there anything in the content you selected that needs transformed\n // before it's consumable content? E.g., unusual lazy loaded images\n transforms: {},\n\n // Is there anything that is in the result that shouldn't be?\n // The clean selectors will remove anything that matches from\n // the result\n clean: [],\n },\n};\n","export const HellogigglesComExtractor = {\n domain: 'hellogiggles.com',\n\n title: {\n selectors: ['.title'],\n },\n\n author: {\n selectors: ['.author-link'],\n },\n\n date_published: {\n selectors: [['meta[name=\"article:published_time\"]', 'value']],\n },\n\n lead_image_url: {\n selectors: [['meta[name=\"og:image\"]', 'value']],\n },\n\n content: {\n selectors: ['.entry-content'],\n\n // Is there anything in the content you selected that needs transformed\n // before it's consumable content? E.g., unusual lazy loaded images\n transforms: {},\n\n // Is there anything that is in the result that shouldn't be?\n // The clean selectors will remove anything that matches from\n // the result\n clean: [],\n },\n};\n","export const ThoughtcatalogComExtractor = {\n domain: 'thoughtcatalog.com',\n\n title: {\n selectors: ['h1.title', ['meta[name=\"og:title\"]', 'value']],\n },\n\n author: {\n selectors: [\n 'div.col-xs-12.article_header div.writer-container.writer-container-inline.writer-no-avatar h4.writer-name',\n 'h1.writer-name',\n ],\n },\n\n date_published: {\n selectors: [['meta[name=\"article:published_time\"]', 'value']],\n },\n\n lead_image_url: {\n selectors: [['meta[name=\"og:image\"]', 'value']],\n },\n\n content: {\n selectors: ['.entry.post'],\n\n // Is there anything in the content you selected that needs transformed\n // before it's consumable content? E.g., unusual lazy loaded images\n transforms: {},\n\n // Is there anything that is in the result that shouldn't be?\n // The clean selectors will remove anything that matches from\n // the result\n clean: ['.tc_mark'],\n },\n};\n","export const WwwNjComExtractor = {\n domain: 'www.nj.com',\n\n title: {\n selectors: [['meta[name=\"title\"]', 'value']],\n },\n\n author: {\n selectors: [['meta[name=\"article_author\"]', 'value']],\n },\n\n date_published: {\n selectors: [['meta[name=\"article_date_original\"]', 'value']],\n\n timezone: 'America/New_York',\n },\n\n lead_image_url: {\n selectors: [['meta[name=\"og:image\"]', 'value']],\n },\n\n content: {\n selectors: ['.entry-content'],\n\n // Is there anything in the content you selected that needs transformed\n // before it's consumable content? E.g., unusual lazy loaded images\n transforms: {},\n\n // Is there anything that is in the result that shouldn't be?\n // The clean selectors will remove anything that matches from\n // the result\n clean: [],\n },\n};\n","export const WwwInquisitrComExtractor = {\n domain: 'www.inquisitr.com',\n\n title: {\n selectors: ['h1.entry-title.story--header--title'],\n },\n\n author: {\n selectors: ['div.story--header--author'],\n },\n\n date_published: {\n selectors: [['meta[name=\"datePublished\"]', 'value']],\n },\n\n lead_image_url: {\n selectors: [['meta[name=\"og:image\"]', 'value']],\n },\n\n content: {\n selectors: ['article.story', '.entry-content.'],\n\n // Is there anything in the content you selected that needs transformed\n // before it's consumable content? E.g., unusual lazy loaded images\n transforms: {},\n\n // Is there anything that is in the result that shouldn't be?\n // The clean selectors will remove anything that matches from\n // the result\n clean: [\n '.post-category',\n '.story--header--socials',\n '.story--header--content',\n ],\n },\n};\n","export const WwwNbcnewsComExtractor = {\n domain: 'www.nbcnews.com',\n\n title: {\n selectors: ['div.article-hed h1'],\n },\n\n author: {\n selectors: ['span.byline_author'],\n },\n\n date_published: {\n selectors: [\n ['.flag_article-wrapper time.timestamp_article[datetime]', 'datetime'],\n '.flag_article-wrapper time',\n ],\n\n timezone: 'America/New_York',\n },\n\n lead_image_url: {\n selectors: [['meta[name=\"og:image\"]', 'value']],\n },\n\n content: {\n selectors: ['div.article-body'],\n\n // Is there anything in the content you selected that needs transformed\n // before it's consumable content? E.g., unusual lazy loaded images\n transforms: {},\n\n // Is there anything that is in the result that shouldn't be?\n // The clean selectors will remove anything that matches from\n // the result\n clean: [],\n },\n};\n","export const FortuneComExtractor = {\n domain: 'fortune.com',\n\n title: {\n selectors: ['h1'],\n },\n\n author: {\n selectors: [['meta[name=\"author\"]', 'value']],\n },\n\n date_published: {\n selectors: ['.MblGHNMJ'],\n\n timezone: 'UTC',\n },\n\n lead_image_url: {\n selectors: [['meta[name=\"og:image\"]', 'value']],\n },\n\n content: {\n selectors: [['picture', 'article.row'], 'article.row'],\n\n // Is there anything in the content you selected that needs transformed\n // before it's consumable content? E.g., unusual lazy loaded images\n transforms: {},\n\n // Is there anything that is in the result that shouldn't be?\n // The clean selectors will remove anything that matches from\n // the result\n clean: [],\n },\n};\n","export const WwwLinkedinComExtractor = {\n domain: 'www.linkedin.com',\n\n title: {\n selectors: ['.article-title', 'h1'],\n },\n\n author: {\n selectors: [\n ['meta[name=\"article:author\"]', 'value'],\n '.entity-name a[rel=author]',\n ],\n },\n\n date_published: {\n selectors: [['time[itemprop=\"datePublished\"]', 'datetime']],\n\n timezone: 'America/Los_Angeles',\n },\n\n dek: {\n selectors: [\n // enter selectors\n ],\n },\n\n lead_image_url: {\n selectors: [['meta[name=\"og:image\"]', 'value']],\n },\n\n content: {\n selectors: [['header figure', '.prose'], '.prose'],\n\n // Is there anything in the content you selected that needs transformed\n // before it's consumable content? E.g., unusual lazy loaded images\n transforms: {},\n\n // Is there anything that is in the result that shouldn't be?\n // The clean selectors will remove anything that matches from\n // the result\n clean: ['.entity-image'],\n },\n};\n","export const ObamawhitehouseArchivesGovExtractor = {\n domain: 'obamawhitehouse.archives.gov',\n\n supportedDomains: ['whitehouse.gov'],\n\n title: {\n selectors: ['h1', '.pane-node-title'],\n },\n\n author: {\n selectors: ['.blog-author-link', '.node-person-name-link'],\n },\n\n date_published: {\n selectors: [['meta[name=\"article:published_time\"]', 'value']],\n },\n\n dek: {\n selectors: ['.field-name-field-forall-summary'],\n },\n\n lead_image_url: {\n selectors: [['meta[name=\"og:image\"]', 'value']],\n },\n\n content: {\n defaultCleaner: false,\n\n selectors: ['div#content-start', '.pane-node-field-forall-body'],\n\n // Is there anything in the content you selected that needs transformed\n // before it's consumable content? E.g., unusual lazy loaded images\n transforms: {},\n\n // Is there anything that is in the result that shouldn't be?\n // The clean selectors will remove anything that matches from\n // the result\n clean: ['.pane-node-title', '.pane-custom.pane-1'],\n },\n};\n","export const WwwOpposingviewsComExtractor = {\n domain: 'www.opposingviews.com',\n\n title: {\n selectors: ['h1.title'],\n },\n\n author: {\n selectors: ['div.date span span a'],\n },\n\n date_published: {\n selectors: [['meta[name=\"publish_date\"]', 'value']],\n },\n\n dek: {\n selectors: [\n // enter selectors\n ],\n },\n\n lead_image_url: {\n selectors: [['meta[name=\"og:image\"]', 'value']],\n },\n\n content: {\n selectors: ['.article-content'],\n\n // Is there anything in the content you selected that needs transformed\n // before it's consumable content? E.g., unusual lazy loaded images\n transforms: {},\n\n // Is there anything that is in the result that shouldn't be?\n // The clean selectors will remove anything that matches from\n // the result\n clean: ['.show-for-small-only'],\n },\n};\n","export const WwwProspectmagazineCoUkExtractor = {\n domain: 'www.prospectmagazine.co.uk',\n\n title: {\n selectors: ['.page-title'],\n },\n\n author: {\n selectors: ['.aside_author .title'],\n },\n\n date_published: {\n selectors: ['.post-info'],\n\n timezone: 'Europe/London',\n },\n\n dek: {\n selectors: ['.page-subtitle'],\n },\n\n lead_image_url: {\n selectors: [['meta[name=\"og:image\"]', 'value']],\n },\n\n content: {\n selectors: ['article .post_content'],\n\n // Is there anything in the content you selected that needs transformed\n // before it's consumable content? E.g., unusual lazy loaded images\n transforms: {},\n\n // Is there anything that is in the result that shouldn't be?\n // The clean selectors will remove anything that matches from\n // the result\n clean: [],\n },\n};\n","export const ForwardComExtractor = {\n domain: 'forward.com',\n\n title: {\n selectors: [['meta[name=\"og:title\"]', 'value']],\n },\n\n author: {\n selectors: ['.author-name', ['meta[name=\"sailthru.author\"]', 'value']],\n },\n\n date_published: {\n selectors: [['meta[name=\"date\"]', 'value']],\n },\n\n dek: {\n selectors: [\n // enter selectors\n ],\n },\n\n lead_image_url: {\n selectors: [['meta[name=\"og:image\"]', 'value']],\n },\n\n content: {\n selectors: [['.post-item-media-wrap', '.post-item p']],\n\n // Is there anything in the content you selected that needs transformed\n // before it's consumable content? E.g., unusual lazy loaded images\n transforms: {},\n\n // Is there anything that is in the result that shouldn't be?\n // The clean selectors will remove anything that matches from\n // the result\n clean: ['.donate-box', '.message', '.subtitle'],\n },\n};\n","export const WwwQdailyComExtractor = {\n domain: 'www.qdaily.com',\n\n title: {\n selectors: ['h2', 'h2.title'],\n },\n\n author: {\n selectors: ['.name'],\n },\n\n date_published: {\n selectors: [['.date.smart-date', 'data-origindate']],\n },\n\n dek: {\n selectors: ['.excerpt'],\n },\n\n lead_image_url: {\n selectors: [['.article-detail-hd img', 'src']],\n },\n\n content: {\n selectors: ['.detail'],\n\n // Is there anything in the content you selected that needs transformed\n // before it's consumable content? E.g., unusual lazy loaded images\n transforms: {},\n\n // Is there anything that is in the result that shouldn't be?\n // The clean selectors will remove anything that matches from\n // the result\n clean: ['.lazyload', '.lazylad', '.lazylood'],\n },\n};\n","export const GothamistComExtractor = {\n domain: 'gothamist.com',\n\n supportedDomains: [\n 'chicagoist.com',\n 'laist.com',\n 'sfist.com',\n 'shanghaiist.com',\n 'dcist.com',\n ],\n\n title: {\n selectors: ['h1', '.entry-header h1'],\n },\n\n author: {\n selectors: ['.author'],\n },\n\n date_published: {\n selectors: ['abbr', 'abbr.published'],\n\n timezone: 'America/New_York',\n },\n\n dek: {\n selectors: [null],\n },\n\n lead_image_url: {\n selectors: [['meta[name=\"og:image\"]', 'value']],\n },\n\n content: {\n selectors: ['.entry-body'],\n\n // Is there anything in the content you selected that needs transformed\n // before it's consumable content? E.g., unusual lazy loaded images\n transforms: {\n 'div.image-none': 'figure',\n '.image-none i': 'figcaption',\n 'div.image-left': 'figure',\n '.image-left i': 'figcaption',\n 'div.image-right': 'figure',\n '.image-right i': 'figcaption',\n },\n\n // Is there anything that is in the result that shouldn't be?\n // The clean selectors will remove anything that matches from\n // the result\n clean: [\n '.image-none br',\n '.image-left br',\n '.image-right br',\n '.galleryEase',\n ],\n },\n};\n","export const WwwFoolComExtractor = {\n domain: 'www.fool.com',\n\n title: {\n selectors: ['h1'],\n },\n\n author: {\n selectors: ['.author-inline .author-name'],\n },\n\n date_published: {\n selectors: [['meta[name=\"date\"]', 'value']],\n },\n\n dek: {\n selectors: ['header h2'],\n },\n\n lead_image_url: {\n selectors: [['meta[name=\"og:image\"]', 'value']],\n },\n\n content: {\n selectors: ['.article-content'],\n\n // Is there anything in the content you selected that needs transformed\n // before it's consumable content? E.g., unusual lazy loaded images\n transforms: {\n '.caption img': $node => {\n const src = $node.attr('src');\n $node.parent().replaceWith(``);\n },\n '.caption': 'figcaption',\n },\n\n // Is there anything that is in the result that shouldn't be?\n // The clean selectors will remove anything that matches from\n // the result\n clean: ['#pitch'],\n },\n};\n","export const WwwSlateComExtractor = {\n domain: 'www.slate.com',\n\n title: {\n selectors: ['.hed', 'h1'],\n },\n\n author: {\n selectors: ['a[rel=author]'],\n },\n\n date_published: {\n selectors: ['.pub-date'],\n\n timezone: 'America/New_York',\n },\n\n dek: {\n selectors: ['.dek'],\n },\n\n lead_image_url: {\n selectors: [['meta[name=\"og:image\"]', 'value']],\n },\n\n content: {\n selectors: ['.body'],\n\n // Is there anything in the content you selected that needs transformed\n // before it's consumable content? E.g., unusual lazy loaded images\n transforms: {},\n\n // Is there anything that is in the result that shouldn't be?\n // The clean selectors will remove anything that matches from\n // the result\n clean: [\n '.about-the-author',\n '.pullquote',\n '.newsletter-signup-component',\n '.top-comment',\n ],\n },\n};\n","export const IciRadioCanadaCaExtractor = {\n domain: 'ici.radio-canada.ca',\n\n title: {\n selectors: ['h1'],\n },\n\n author: {\n selectors: [['meta[name=\"dc.creator\"]', 'value']],\n },\n\n date_published: {\n selectors: [['meta[name=\"dc.date.created\"]', 'value']],\n format: 'YYYY-MM-DD|HH[h]mm',\n timezone: 'America/New_York',\n },\n\n dek: {\n selectors: ['.bunker-component.lead'],\n },\n\n lead_image_url: {\n selectors: [['meta[name=\"og:image\"]', 'value']],\n },\n\n content: {\n selectors: [['.main-multimedia-item', '.news-story-content']],\n\n // Is there anything in the content you selected that needs transformed\n // before it's consumable content? E.g., unusual lazy loaded images\n transforms: {},\n\n // Is there anything that is in the result that shouldn't be?\n // The clean selectors will remove anything that matches from\n // the result\n clean: [],\n },\n};\n","export const WwwFortinetComExtractor = {\n domain: 'www.fortinet.com',\n\n title: {\n selectors: ['h1'],\n },\n\n author: {\n selectors: ['.b15-blog-meta__author'],\n },\n\n date_published: {\n selectors: [['meta[name=\"article:published_time\"]', 'value']],\n },\n\n lead_image_url: {\n selectors: [['meta[name=\"og:image\"]', 'value']],\n },\n\n content: {\n selectors: [\n 'div.responsivegrid.aem-GridColumn.aem-GridColumn--default--12',\n ],\n\n transforms: {\n noscript: $node => {\n const $children = $node.children();\n if ($children.length === 1 && $children.get(0).tagName === 'img') {\n return 'figure';\n }\n return null;\n },\n },\n },\n};\n","export const WwwFastcompanyComExtractor = {\n domain: 'www.fastcompany.com',\n\n title: {\n selectors: ['h1'],\n },\n\n author: {\n selectors: ['.post__by'],\n },\n\n date_published: {\n selectors: [['meta[name=\"article:published_time\"]', 'value']],\n },\n\n dek: {\n selectors: ['.post__deck'],\n },\n\n lead_image_url: {\n selectors: [['meta[name=\"og:image\"]', 'value']],\n },\n\n content: {\n selectors: ['.post__article'],\n },\n};\n","export const BlisterreviewComExtractor = {\n domain: 'blisterreview.com',\n\n title: {\n selectors: [['meta[name=\"og:title\"]', 'value'], 'h1.entry-title'],\n },\n\n author: {\n selectors: ['span.author-name'],\n },\n\n date_published: {\n selectors: [\n ['meta[name=\"article:published_time\"]', 'value'],\n ['time.entry-date', 'datetime'],\n ['meta[itemprop=\"datePublished\"]', 'content'],\n ],\n },\n\n dek: {\n selectors: [\n // enter selectors\n ],\n },\n\n lead_image_url: {\n selectors: [\n ['meta[name=\"og:image\"]', 'value'],\n ['meta[property=\"og:image\"]', 'content'],\n ['meta[itemprop=\"image\"]', 'content'],\n ['meta[name=\"twitter:image\"]', 'content'],\n ['img.attachment-large', 'src'],\n ],\n },\n\n content: {\n selectors: [\n [\n '.elementor-section-wrap',\n '.elementor-text-editor > p, .elementor-text-editor > ul > li, .attachment-large, .wp-caption-text',\n ],\n ],\n\n // Is there anything in the content you selected that needs transformed\n // before it's consumable content? E.g., unusual lazy loaded images\n transforms: {\n figcaption: 'p',\n },\n\n // Is there anything that is in the result that shouldn't be?\n // The clean selectors will remove anything that matches from\n // the result\n clean: ['.comments-area'],\n },\n};\n","export const NewsMynaviJpExtractor = {\n domain: 'news.mynavi.jp',\n\n title: {\n selectors: [['meta[name=\"og:title\"]', 'value']],\n },\n\n author: {\n selectors: ['main div.article-author a.article-author__name'],\n },\n\n date_published: {\n selectors: [['meta[name=\"article:published_time\"]', 'value']],\n },\n\n dek: {\n selectors: [['meta[name=\"og:description\"]', 'value']],\n },\n\n lead_image_url: {\n selectors: [['meta[name=\"og:image\"]', 'value']],\n },\n\n content: {\n selectors: ['main article div'],\n\n // Is there anything in the content you selected that needs transformed\n // before it's consumable content? E.g., unusual lazy loaded images\n transforms: {\n img: $node => {\n const src = $node.attr('data-original');\n if (src !== '') {\n $node.attr('src', src);\n }\n },\n },\n\n // Is there anything that is in the result that shouldn't be?\n // The clean selectors will remove anything that matches from\n // the result\n clean: [],\n },\n};\n","export const ClinicaltrialsGovExtractor = {\n domain: 'clinicaltrials.gov',\n\n title: {\n selectors: ['h1.tr-solo_record'],\n },\n\n author: {\n selectors: ['div#sponsor.tr-info-text'],\n },\n\n date_published: {\n // selectors: ['span.term[data-term=\"Last Update Posted\"]'],\n selectors: ['div:has(> span.term[data-term=\"Last Update Posted\"])'],\n },\n\n content: {\n selectors: ['div#tab-body'],\n\n // Is there anything in the content you selected that needs transformed\n // before it's consumable content? E.g., unusual lazy loaded images\n transforms: {},\n\n // Is there anything that is in the result that shouldn't be?\n // The clean selectors will remove anything that matches from\n // the result\n clean: ['.usa-alert> img'],\n },\n};\n","export const GithubComExtractor = {\n domain: 'github.com',\n\n title: {\n selectors: [['meta[name=\"og:title\"]', 'value']],\n },\n\n author: {\n selectors: [\n // enter author selectors\n ],\n },\n\n date_published: {\n selectors: [['span[itemprop=\"dateModified\"] relative-time', 'datetime']],\n },\n\n dek: {\n selectors: ['span[itemprop=\"about\"]'],\n },\n\n lead_image_url: {\n selectors: [['meta[name=\"og:image\"]', 'value']],\n },\n\n content: {\n selectors: [['#readme article']],\n\n // Is there anything in the content you selected that needs transformed\n // before it's consumable content? E.g., unusual lazy loaded images\n transforms: {},\n\n // Is there anything that is in the result that shouldn't be?\n // The clean selectors will remove anything that matches from\n // the result\n clean: [],\n },\n};\n","export const WwwRedditComExtractor = {\n domain: 'www.reddit.com',\n\n title: {\n selectors: ['div[data-test-id=\"post-content\"] h2'],\n },\n\n author: {\n selectors: ['div[data-test-id=\"post-content\"] a[href*=\"user/\"]'],\n },\n\n date_published: {\n selectors: [\n 'div[data-test-id=\"post-content\"] a[data-click-id=\"timestamp\"]',\n ],\n },\n\n lead_image_url: {\n selectors: [['meta[name=\"og:image\"]', 'value']],\n },\n\n content: {\n selectors: [\n ['div[data-test-id=\"post-content\"] p'], // text post\n [\n 'div[data-test-id=\"post-content\"] a[target=\"_blank\"]:not([data-click-id=\"timestamp\"])', // external link\n 'div[data-test-id=\"post-content\"] div[data-click-id=\"media\"]', // embedded media\n ], // external link with media preview (YouTube, imgur album, etc...)\n ['div[data-test-id=\"post-content\"] div[data-click-id=\"media\"]'], // Embedded media (Reddit video)\n [\n 'div[data-test-id=\"post-content\"] a[target=\"_blank\"]:not([data-click-id=\"timestamp\"])',\n ], // external link\n 'div[data-test-id=\"post-content\"]',\n ],\n\n // Is there anything in the content you selected that needs transformed\n // before it's consumable content? E.g., unusual lazy loaded images\n transforms: {\n 'div[role=\"img\"]': $node => {\n // External link image preview\n const $img = $node.find('img');\n const bgImg = $node.css('background-image');\n if ($img.length === 1 && bgImg) {\n $img.attr('src', bgImg.match(/\\((.*?)\\)/)[1].replace(/('|\")/g, ''));\n return $img;\n }\n return $node;\n },\n },\n\n // Is there anything that is in the result that shouldn't be?\n // The clean selectors will remove anything that matches from\n // the result\n clean: ['.icon'],\n },\n};\n","export const OtrsComExtractor = {\n domain: 'otrs.com',\n\n title: {\n selectors: ['#main article h1'],\n },\n\n author: {\n selectors: ['div.dateplusauthor a'],\n },\n\n date_published: {\n selectors: [['meta[name=\"article:published_time\"]', 'value']],\n },\n\n dek: {\n selectors: [['meta[name=\"og:description\"]', 'value']],\n },\n\n lead_image_url: {\n selectors: [['meta[name=\"og:image\"]', 'value']],\n },\n\n content: {\n selectors: ['#main article'],\n\n defaultCleaner: false,\n\n transforms: {},\n\n clean: [\n 'div.dateplusauthor',\n 'div.gr-12.push-6.footershare',\n '#atftbx',\n 'div.category-modul',\n ],\n },\n};\n","export const WwwOssnewsJpExtractor = {\n domain: 'www.ossnews.jp',\n\n title: {\n selectors: ['#alpha-block h1.hxnewstitle'],\n },\n\n author: null,\n\n date_published: {\n selectors: ['p.fs12'],\n format: 'YYYY年MM月DD日 HH:mm',\n timezone: 'Asia/Tokyo',\n },\n\n dek: null,\n\n lead_image_url: {\n selectors: [['meta[name=\"og:image\"]', 'value']],\n },\n\n content: {\n selectors: ['#alpha-block .section:has(h1.hxnewstitle)'],\n\n defaultCleaner: false,\n\n transforms: {},\n\n clean: [],\n },\n};\n","export const BuzzapJpExtractor = {\n domain: 'buzzap.jp',\n\n title: {\n selectors: ['h1.entry-title'],\n },\n\n author: null,\n\n date_published: {\n selectors: [['time.entry-date', 'datetime']],\n },\n\n dek: null,\n\n lead_image_url: {\n selectors: [['meta[name=\"og:image\"]', 'value']],\n },\n\n content: {\n selectors: ['div.ctiframe'],\n\n defaultCleaner: false,\n\n transforms: {},\n\n clean: [],\n },\n};\n","export const WwwAsahiComExtractor = {\n domain: 'www.asahi.com',\n\n title: {\n selectors: ['.ArticleTitle h1'],\n },\n\n author: {\n selectors: [['meta[name=\"article:author\"]', 'value']],\n },\n\n date_published: {\n selectors: [['meta[name=\"pubdate\"]', 'value']],\n },\n\n dek: null,\n\n excerpt: {\n selectors: [['meta[name=\"og:description\"]', 'value']],\n },\n\n lead_image_url: {\n selectors: [['meta[name=\"og:image\"]', 'value']],\n },\n\n content: {\n selectors: ['#MainInner div.ArticleBody'],\n\n defaultCleaner: false,\n\n transforms: {},\n\n clean: ['div.AdMod', 'div.LoginSelectArea'],\n },\n};\n","export const WwwSanwaCoJpExtractor = {\n domain: 'www.sanwa.co.jp',\n\n title: {\n selectors: ['#newsContent h1'],\n },\n\n author: null,\n\n date_published: {\n selectors: ['p.date'],\n format: 'YYYY.MM.DD',\n timezone: 'Asia/Tokyo',\n },\n\n dek: {\n selectors: [['meta[name=\"og:description\"]', 'value']],\n },\n\n lead_image_url: {\n selectors: [['meta[name=\"og:image\"]', 'value']],\n },\n\n content: {\n selectors: ['#newsContent'],\n\n defaultCleaner: false,\n\n transforms: {},\n\n clean: ['#smartphone', 'div.sns_box', 'div.contentFoot'],\n },\n};\n","export const WwwElecomCoJpExtractor = {\n domain: 'www.elecom.co.jp',\n\n title: {\n selectors: ['title'],\n },\n\n author: null,\n\n date_published: {\n selectors: ['p.section-last'],\n format: 'YYYY.MM.DD',\n timezone: 'Asia/Tokyo',\n },\n\n dek: null,\n\n lead_image_url: null,\n\n content: {\n selectors: ['td.TableMain2'],\n\n defaultCleaner: false,\n\n transforms: {\n table: $node => {\n $node.attr('width', 'auto');\n },\n },\n\n clean: [],\n },\n};\n","export const ScanNetsecurityNeJpExtractor = {\n domain: 'scan.netsecurity.ne.jp',\n\n title: {\n selectors: ['header.arti-header h1.head'],\n },\n\n author: null,\n\n date_published: {\n selectors: [['meta[name=\"article:modified_time\"]', 'value']],\n },\n\n dek: {\n selectors: ['header.arti-header p.arti-summary'],\n },\n\n lead_image_url: {\n selectors: [['meta[name=\"og:image\"]', 'value']],\n },\n\n content: {\n selectors: ['div.arti-content.arti-content--thumbnail'],\n\n defaultCleaner: false,\n\n transforms: {},\n\n clean: ['aside.arti-giga'],\n },\n};\n","export const JvndbJvnJpExtractor = {\n domain: 'jvndb.jvn.jp',\n\n title: {\n selectors: ['title'],\n },\n\n author: null,\n\n date_published: {\n selectors: ['div.modifytxt:nth-child(2)'],\n format: 'YYYY/MM/DD',\n timezone: 'Asia/Tokyo',\n },\n\n dek: null,\n\n lead_image_url: null,\n\n content: {\n selectors: ['#news-list'],\n\n defaultCleaner: false,\n\n transforms: {},\n\n clean: [],\n },\n};\n","export const GeniusComExtractor = {\n domain: 'genius.com',\n\n title: {\n selectors: ['h1'],\n },\n\n author: {\n selectors: ['h2 a'],\n },\n\n date_published: {\n selectors: [\n [\n 'meta[itemprop=page_data]',\n 'value',\n res => {\n const json = JSON.parse(res);\n return json.song.release_date;\n },\n ],\n ],\n },\n\n dek: {\n selectors: [\n // enter selectors\n ],\n },\n\n lead_image_url: {\n selectors: [\n [\n 'meta[itemprop=page_data]',\n 'value',\n res => {\n const json = JSON.parse(res);\n return json.song.album.cover_art_url;\n },\n ],\n ],\n },\n\n content: {\n selectors: ['.lyrics'],\n\n // Is there anything in the content you selected that needs transformed\n // before it's consumable content? E.g., unusual lazy loaded images\n transforms: {},\n\n // Is there anything that is in the result that shouldn't be?\n // The clean selectors will remove anything that matches from\n // the result\n clean: [],\n },\n};\n","export const WwwJnsaOrgExtractor = {\n domain: 'www.jnsa.org',\n\n title: {\n selectors: ['#wgtitle h2'],\n },\n\n author: null,\n\n date_published: null,\n\n dek: null,\n\n excerpt: {\n selectors: [['meta[name=\"og:description\"]', 'value']],\n },\n\n lead_image_url: {\n selectors: [['meta[name=\"og:image\"]', 'value']],\n },\n\n content: {\n selectors: ['#main_area'],\n\n transforms: {},\n\n clean: ['#pankuzu', '#side'],\n },\n};\n","export const PhpspotOrgExtractor = {\n domain: 'phpspot.org',\n\n title: {\n selectors: ['h3.hl'],\n },\n\n author: null,\n\n date_published: {\n selectors: ['h4.hl'],\n format: 'YYYY年MM月DD日',\n timezone: 'Asia/Tokyo',\n },\n\n dek: null,\n\n lead_image_url: null,\n\n content: {\n selectors: ['div.entrybody'],\n\n defaultCleaner: false,\n\n transforms: {},\n\n clean: [],\n },\n};\n","export const WwwInfoqComExtractor = {\n domain: 'www.infoq.com',\n\n title: {\n selectors: ['h1.heading'],\n },\n\n author: {\n selectors: ['div.widget.article__authors'],\n },\n\n date_published: {\n selectors: ['.article__readTime.date'],\n format: 'YYYY年MM月DD日',\n timezone: 'Asia/Tokyo',\n },\n\n dek: {\n selectors: [['meta[name=\"og:description\"]', 'value']],\n },\n\n lead_image_url: {\n selectors: [['meta[name=\"og:image\"]', 'value']],\n },\n\n content: {\n selectors: ['div.article__data'],\n\n defaultCleaner: false,\n\n transforms: {},\n\n clean: [],\n },\n};\n","export const WwwMoongiftJpExtractor = {\n domain: 'www.moongift.jp',\n\n title: {\n selectors: ['h1.title a'],\n },\n\n author: null,\n\n date_published: {\n selectors: ['ul.meta li:not(.social):first-of-type'],\n timezone: 'Asia/Tokyo',\n },\n\n dek: {\n selectors: [['meta[name=\"og:description\"]', 'value']],\n },\n\n lead_image_url: {\n selectors: [['meta[name=\"og:image\"]', 'value']],\n },\n\n content: {\n selectors: ['#main'],\n\n transforms: {},\n\n clean: ['ul.mg_service.cf'],\n },\n};\n","export const WwwItmediaCoJpExtractor = {\n domain: 'www.itmedia.co.jp',\n\n supportedDomains: [\n 'www.atmarkit.co.jp',\n 'techtarget.itmedia.co.jp',\n 'nlab.itmedia.co.jp',\n ],\n\n title: {\n selectors: ['#cmsTitle h1'],\n },\n\n author: {\n selectors: ['#byline'],\n },\n\n date_published: {\n selectors: [['meta[name=\"article:modified_time\"]', 'value']],\n },\n\n dek: {\n selectors: ['#cmsAbstract h2'],\n },\n\n lead_image_url: {\n selectors: [['meta[name=\"og:image\"]', 'value']],\n },\n\n content: {\n selectors: ['#cmsBody'],\n\n defaultCleaner: false,\n\n transforms: {},\n\n clean: ['#snsSharebox'],\n },\n};\n","export const WwwPublickey1JpExtractor = {\n domain: 'www.publickey1.jp',\n\n title: {\n selectors: ['h1'],\n },\n\n author: {\n selectors: ['#subcol p:has(img)'],\n },\n\n date_published: {\n selectors: ['div.pubdate'],\n format: 'YYYY年MM月DD日',\n timezone: 'Asia/Tokyo',\n },\n\n dek: null,\n\n lead_image_url: {\n selectors: [['meta[name=\"og:image\"]', 'value']],\n },\n\n content: {\n selectors: ['#maincol'],\n\n defaultCleaner: false,\n\n transforms: {},\n\n clean: ['#breadcrumbs', 'div.sbm', 'div.ad_footer'],\n },\n};\n","export const TakagihiromitsuJpExtractor = {\n domain: 'takagi-hiromitsu.jp',\n\n title: {\n selectors: ['h3'],\n },\n\n author: {\n selectors: [['meta[name=\"author\"]', 'value']],\n },\n\n date_published: {\n selectors: [['meta[http-equiv=\"Last-Modified\"]', 'value']],\n },\n\n dek: null,\n\n lead_image_url: null,\n\n content: {\n selectors: ['div.body'],\n\n defaultCleaner: false,\n\n transforms: {},\n\n clean: [],\n },\n};\n","export const BookwalkerJpExtractor = {\n domain: 'bookwalker.jp',\n\n title: {\n selectors: ['h1.main-heading'],\n },\n\n author: {\n selectors: ['div.authors'],\n },\n\n date_published: {\n selectors: [\n '.work-info .work-detail:first-of-type .work-detail-contents:last-of-type',\n ],\n timezone: 'Asia/Tokyo',\n },\n\n dek: null,\n\n lead_image_url: {\n selectors: [['meta[name=\"og:image\"]', 'value']],\n },\n\n content: {\n selectors: [['div.main-info', 'div.main-cover-inner']],\n\n defaultCleaner: false,\n\n transforms: {},\n\n clean: [\n 'span.label.label--trial',\n 'dt.info-head.info-head--coin',\n 'dd.info-contents.info-contents--coin',\n 'div.info-notice.fn-toggleClass',\n ],\n },\n};\n","export const WwwYomiuriCoJpExtractor = {\n domain: 'www.yomiuri.co.jp',\n\n title: {\n selectors: ['h1.title-article.c-article-title'],\n },\n\n author: null,\n\n date_published: {\n selectors: [['meta[name=\"article:published_time\"]', 'value']],\n },\n\n dek: null,\n\n lead_image_url: {\n selectors: [['meta[name=\"og:image\"]', 'value']],\n },\n\n content: {\n selectors: ['div.p-main-contents'],\n\n transforms: {},\n\n clean: [],\n },\n};\n","export const JapanCnetComExtractor = {\n domain: 'japan.cnet.com',\n\n title: {\n selectors: ['.leaf-headline-ttl'],\n },\n\n author: {\n selectors: ['.writer'],\n },\n\n date_published: {\n selectors: ['.date'],\n format: 'YYYY年MM月DD日 HH時mm分',\n timezone: 'Asia/Tokyo',\n },\n\n dek: null,\n\n lead_image_url: {\n selectors: [['meta[name=\"og:image\"]', 'value']],\n },\n\n content: {\n selectors: ['div.article_body'],\n\n transforms: {},\n\n clean: [],\n },\n};\n","export const DeadlineComExtractor = {\n domain: 'deadline.com',\n\n title: {\n selectors: ['h1'],\n },\n\n author: {\n selectors: ['section.author h3'],\n },\n\n date_published: {\n selectors: [['meta[name=\"article:published_time\"]', 'value']],\n },\n\n dek: null,\n\n lead_image_url: {\n selectors: [['meta[name=\"og:image\"]', 'value']],\n },\n\n content: {\n selectors: ['div.a-article-grid__main.pmc-a-grid article.pmc-a-grid-item'],\n\n transforms: {\n '.embed-twitter': $node => {\n const innerHtml = $node.html();\n $node.replaceWith(innerHtml);\n },\n },\n\n clean: [],\n },\n};\n","export const WwwGizmodoJpExtractor = {\n domain: 'www.gizmodo.jp',\n\n title: {\n selectors: ['h1.p-post-title'],\n },\n\n author: {\n selectors: ['li.p-post-AssistAuthor'],\n },\n\n date_published: {\n selectors: [['li.p-post-AssistTime time', 'datetime']],\n },\n\n dek: null,\n\n lead_image_url: {\n selectors: [['meta[name=\"og:image\"]', 'value']],\n },\n\n content: {\n selectors: ['article.p-post'],\n\n transforms: {\n 'img.p-post-thumbnailImage': $node => {\n const src = $node.attr('src');\n $node.attr('src', src.replace(/^.*=%27/, '').replace(/%27;$/, ''));\n },\n },\n\n clean: ['h1.p-post-title', 'ul.p-post-Assist'],\n },\n};\n","export const GetnewsJpExtractor = {\n domain: 'getnews.jp',\n\n title: {\n selectors: ['article h1'],\n },\n\n author: {\n selectors: ['span.prof'],\n },\n\n date_published: {\n selectors: [['ul.cattag-top time', 'datetime']],\n },\n\n dek: null,\n\n lead_image_url: {\n selectors: [['meta[name=\"og:image\"]', 'value']],\n },\n\n content: {\n selectors: ['div.post-bodycopy'],\n\n transforms: {},\n\n clean: [],\n },\n};\n","export const WwwLifehackerJpExtractor = {\n domain: 'www.lifehacker.jp',\n\n title: {\n selectors: ['h1.lh-summary-title'],\n },\n\n author: {\n selectors: ['p.lh-entryDetailInner--credit'],\n },\n\n date_published: {\n selectors: [['div.lh-entryDetail-header time', 'datetime']],\n },\n\n dek: null,\n\n lead_image_url: {\n selectors: [['meta[name=\"og:image\"]', 'value']],\n },\n\n content: {\n selectors: ['div.lh-entryDetail-body'],\n\n transforms: {\n 'img.lazyload': $node => {\n const src = $node.attr('src');\n $node.attr('src', src.replace(/^.*=%27/, '').replace(/%27;$/, ''));\n },\n },\n\n clean: ['p.lh-entryDetailInner--credit'],\n },\n};\n","export const SectIijAdJpExtractor = {\n domain: 'sect.iij.ad.jp',\n\n title: {\n selectors: ['h3'],\n },\n\n author: {\n selectors: ['dl.entrydate dd'],\n },\n\n date_published: {\n selectors: ['dl.entrydate dd'],\n format: 'YYYY年MM月DD日',\n timezone: 'Asia/Tokyo',\n },\n\n dek: null,\n\n lead_image_url: {\n selectors: [['meta[name=\"og:image\"]', 'value']],\n },\n\n content: {\n selectors: ['#article'],\n\n transforms: {},\n\n clean: ['dl.entrydate'],\n },\n};\n","export const WwwOreillyCoJpExtractor = {\n domain: 'www.oreilly.co.jp',\n\n title: {\n selectors: ['h3'],\n },\n\n author: {\n selectors: ['li[itemprop=\"author\"]'],\n },\n\n date_published: {\n selectors: [['meta[itemprop=\"datePublished\"]', 'value']],\n timezone: 'Asia/Tokyo',\n },\n\n dek: null,\n\n lead_image_url: {\n selectors: [['meta[name=\"og:image\"]', 'value']],\n },\n\n content: {\n selectors: ['#content'],\n\n defaultCleaner: false,\n\n transforms: {},\n\n clean: ['.social-tools'],\n },\n};\n","export const WwwIpaGoJpExtractor = {\n domain: 'www.ipa.go.jp',\n\n title: {\n selectors: ['h1'],\n },\n\n author: null,\n\n date_published: {\n selectors: ['p.ipar_text_right'],\n format: 'YYYY年M月D日',\n timezone: 'Asia/Tokyo',\n },\n\n dek: null,\n\n lead_image_url: null,\n\n content: {\n selectors: ['#ipar_main'],\n\n defaultCleaner: false,\n\n transforms: {},\n\n clean: ['p.ipar_text_right'],\n },\n};\n","export const WeeklyAsciiJpExtractor = {\n domain: 'weekly.ascii.jp',\n\n title: {\n selectors: ['h1[itemprop=\"headline\"]'],\n },\n\n author: {\n selectors: ['p.author'],\n },\n\n date_published: {\n selectors: [['meta[name=\"odate\"]', 'value']],\n },\n\n dek: null,\n\n lead_image_url: {\n selectors: [['meta[name=\"og:image\"]', 'value']],\n },\n\n content: {\n selectors: ['div.article'],\n\n transforms: {},\n\n clean: [],\n },\n};\n","export const TechlogIijAdJpExtractor = {\n domain: 'techlog.iij.ad.jp',\n\n title: {\n selectors: ['h1.entry-title'],\n },\n\n author: {\n selectors: ['a[rel=\"author\"]'],\n },\n\n date_published: {\n selectors: [['time.entry-date', 'datetime']],\n },\n\n dek: null,\n\n lead_image_url: {\n selectors: [['meta[name=\"og:image\"]', 'value']],\n },\n\n content: {\n selectors: ['div.entry-content'],\n\n defaultCleaner: false,\n\n transforms: {},\n\n clean: [],\n },\n};\n","import URL from 'url';\n\nexport const WiredJpExtractor = {\n domain: 'wired.jp',\n\n title: {\n selectors: ['h1.post-title'],\n },\n\n author: {\n selectors: ['p[itemprop=\"author\"]'],\n },\n\n date_published: {\n selectors: [['time', 'datetime']],\n },\n\n dek: {\n selectors: ['.post-intro'],\n },\n\n lead_image_url: {\n selectors: [['meta[name=\"og:image\"]', 'value']],\n },\n\n content: {\n selectors: ['article.article-detail'],\n\n transforms: {\n 'img[data-original]': $node => {\n const dataOriginal = $node.attr('data-original');\n const src = $node.attr('src');\n const url = URL.resolve(src, dataOriginal);\n $node.attr('src', url);\n },\n },\n\n clean: ['.post-category', 'time', 'h1.post-title', '.social-area-syncer'],\n },\n};\n","export const JapanZdnetComExtractor = {\n domain: 'japan.zdnet.com',\n\n title: {\n selectors: ['h1'],\n },\n\n author: {\n selectors: [['meta[name=\"cXenseParse:author\"]', 'value']],\n },\n\n date_published: {\n selectors: [['meta[name=\"article:published_time\"]', 'value']],\n },\n\n dek: null,\n\n lead_image_url: {\n selectors: [['meta[name=\"og:image\"]', 'value']],\n },\n\n content: {\n selectors: ['div.article_body'],\n\n transforms: {},\n\n clean: [],\n },\n};\n","export const WwwRbbtodayComExtractor = {\n domain: 'www.rbbtoday.com',\n\n title: {\n selectors: ['h1'],\n },\n\n author: {\n selectors: ['.writer.writer-name'],\n },\n\n date_published: {\n selectors: [['header time', 'datetime']],\n },\n\n dek: {\n selectors: ['.arti-summary'],\n },\n\n lead_image_url: {\n selectors: [['meta[name=\"og:image\"]', 'value']],\n },\n\n content: {\n selectors: ['.arti-content'],\n\n transforms: {},\n\n clean: ['.arti-giga'],\n },\n};\n","export const WwwLemondeFrExtractor = {\n domain: 'www.lemonde.fr',\n\n title: {\n selectors: ['h1.article__title'],\n },\n\n author: {\n selectors: ['.author__name'],\n },\n\n date_published: {\n selectors: [['meta[name=\"og:article:published_time\"]', 'value']],\n },\n\n dek: {\n selectors: ['.article__desc'],\n },\n\n lead_image_url: {\n selectors: [['meta[name=\"og:image\"]', 'value']],\n },\n\n content: {\n selectors: ['.article__content'],\n\n transforms: {},\n\n clean: [],\n },\n};\n","export const WwwPhoronixComExtractor = {\n domain: 'www.phoronix.com',\n\n title: {\n selectors: ['article header'],\n },\n\n author: {\n selectors: ['.author a:first-child'],\n },\n\n date_published: {\n selectors: ['.author'],\n // 1 June 2019 at 08:34 PM EDT\n format: 'D MMMM YYYY at hh:mm',\n timezone: 'America/New_York',\n },\n\n dek: null,\n\n lead_image_url: null,\n\n content: {\n selectors: ['.content'],\n\n // Is there anything in the content you selected that needs transformed\n // before it's consumable content? E.g., unusual lazy loaded images\n transforms: {},\n\n // Is there anything that is in the result that shouldn't be?\n // The clean selectors will remove anything that matches from\n // the result\n clean: [],\n },\n};\n","export const PitchforkComExtractor = {\n domain: 'pitchfork.com',\n\n title: {\n selectors: ['title'],\n },\n\n author: {\n selectors: ['.authors-detail__display-name'],\n },\n\n date_published: {\n selectors: [['.pub-date', 'datetime']],\n },\n\n dek: {\n selectors: ['.review-detail__abstract'],\n },\n\n lead_image_url: {\n selectors: [['.single-album-tombstone__art img', 'src']],\n },\n\n content: {\n selectors: ['.review-detail__text'],\n },\n\n extend: {\n score: {\n selectors: ['.score'],\n },\n },\n};\n","export const BiorxivOrgExtractor = {\n domain: 'biorxiv.org',\n\n title: {\n selectors: ['h1#page-title'],\n },\n\n author: {\n selectors: [\n 'div.highwire-citation-biorxiv-article-top > div.highwire-cite-authors',\n ],\n },\n\n content: {\n selectors: ['div#abstract-1'],\n\n // Is there anything in the content you selected that needs transformed\n // before it's consumable content? E.g., unusual lazy loaded images\n transforms: {},\n\n // Is there anything that is in the result that shouldn't be?\n // The clean selectors will remove anything that matches from\n // the result\n clean: [],\n },\n};\n","export const EpaperZeitDeExtractor = {\n domain: 'epaper.zeit.de',\n\n title: {\n selectors: ['p.title'],\n },\n\n author: {\n selectors: ['.article__author'],\n },\n\n date_published: null,\n\n excerpt: {\n selectors: ['subtitle'],\n },\n\n lead_image_url: null,\n\n content: {\n selectors: ['.article'],\n\n // Is there anything in the content you selected that needs transformed\n // before it's consumable content? E.g., unusual lazy loaded images\n transforms: {\n 'p.title': 'h1',\n '.article__author': 'p',\n byline: 'p',\n linkbox: 'p',\n },\n\n // Is there anything that is in the result that shouldn't be?\n // The clean selectors will remove anything that matches from\n // the result\n clean: ['image-credits', 'box[type=citation]'],\n },\n};\n","export const WwwLadbibleComExtractor = {\n domain: 'www.ladbible.com',\n\n title: {\n selectors: ['h1'],\n },\n\n author: {\n selectors: ['[class*=Byline]'],\n },\n\n date_published: {\n selectors: ['time'],\n timezone: 'Europe/London',\n },\n\n lead_image_url: {\n selectors: [['meta[name=\"og:image\"]', 'value']],\n },\n\n content: {\n selectors: ['[class*=ArticleContainer]'],\n clean: [\n 'time',\n 'source',\n 'a[href^=\"https://www.ladbible.com/\"]',\n 'picture',\n '[class*=StyledCardBlock]',\n ],\n },\n};\n","export const TimesofindiaIndiatimesComExtractor = {\n domain: 'timesofindia.indiatimes.com',\n\n title: {\n selectors: ['h1'],\n },\n\n extend: {\n reporter: {\n selectors: ['div.byline'],\n transforms: {},\n },\n },\n\n date_published: {\n selectors: ['.byline'],\n format: 'MMM D, YYYY, HH:mm z',\n timezone: 'Asia/Kolkata',\n },\n\n lead_image_url: {\n selectors: [['meta[name=\"og:image\"]', 'value']],\n },\n\n content: {\n selectors: ['div.contentwrapper:has(section)'],\n defaultCleaner: false,\n\n clean: ['section', 'h1', '.byline', '.img_cptn'],\n },\n};\n","export const MaTtiasBeExtractor = {\n domain: 'ma.ttias.be',\n\n title: {\n selectors: [['meta[name=\"twitter:title\"]', 'value']],\n },\n\n author: {\n selectors: [['meta[name=\"author\"]', 'value']],\n },\n\n date_published: {\n selectors: [['meta[name=\"article:published_time\"]', 'value']],\n },\n\n content: {\n selectors: [['.content']],\n\n // Is there anything in the content you selected that needs transformed\n // before it's consumable content? E.g., unusual lazy loaded images\n transforms: {\n h2: $node => {\n // The \"id\" attribute values would result in low scores and the element being\n // removed.\n $node.attr('id', null);\n\n // h1 elements will be demoted to h2, so demote h2 elements to h3.\n return 'h3';\n },\n h1: $node => {\n // The \"id\" attribute values would result in low scores and the element being\n // removed.\n $node.attr('id', null);\n\n // A subsequent h2 will be removed if there is not a paragraph before it, so\n // add a paragraph here. It will be removed anyway because it is empty.\n $node.after('');\n },\n ul: $node => {\n // Articles contain lists of links which look like, but are not, navigation\n // elements. Adding this class attribute avoids them being incorrectly removed.\n $node.attr('class', 'entry-content-asset');\n },\n },\n },\n};\n","export const PastebinComExtractor = {\n domain: 'pastebin.com',\n\n title: {\n selectors: ['h1'],\n },\n\n author: {\n selectors: ['.paste_box_line2 .t_us + a'],\n },\n\n date_published: {\n selectors: ['.paste_box_line2 .t_da + span'],\n timezone: 'America/New_York',\n },\n\n lead_image_url: {\n selectors: [['meta[name=\"og:image\"]', 'value']],\n },\n\n content: {\n selectors: ['#selectable .text'],\n\n // Is there anything in the content you selected that needs transformed\n // before it's consumable content? E.g., unusual lazy loaded images\n transforms: {\n ol: 'div',\n li: 'p',\n },\n\n // Is there anything that is in the result that shouldn't be?\n // The clean selectors will remove anything that matches from\n // the result\n clean: [],\n },\n};\n","/* eslint-disable no-nested-ternary */\n/* eslint-disable no-unused-expressions */\nexport const WwwAbendblattDeExtractor = {\n domain: 'www.abendblatt.de',\n\n title: {\n selectors: ['h2.article__header__headline'],\n },\n\n author: {\n selectors: ['span.author-info__name-text'],\n },\n\n date_published: {\n selectors: [['time.article__header__date', 'datetime']],\n },\n\n dek: {\n selectors: [\"span[itemprop='description']\"],\n },\n\n lead_image_url: {\n selectors: [[\"meta[name='og:image']\", 'value']],\n },\n\n content: {\n selectors: ['div.article__body'],\n\n // Is there anything in the content you selected that needs transformed\n // before it's consumable content? E.g., unusual lazy loaded images\n transforms: {\n p: $node => {\n if (!$node.hasClass('obfuscated')) return null;\n let o = '';\n let n = 0;\n for (let i = $node.text(); n < i.length; n += 1) {\n const r = i.charCodeAt(n);\n r === 177\n ? (o += '%')\n : r === 178\n ? (o += '!')\n : r === 180\n ? (o += ';')\n : r === 181\n ? (o += '=')\n : r === 32\n ? (o += ' ')\n : r === 10\n ? (o += '\\n')\n : r > 33 && (o += String.fromCharCode(r - 1));\n }\n\n $node.html(o);\n $node.removeClass('obfuscated');\n $node.addClass('deobfuscated');\n return null;\n },\n div: $node => {\n if (!$node.hasClass('obfuscated')) return null;\n let o = '';\n let n = 0;\n for (let i = $node.text(); n < i.length; n += 1) {\n const r = i.charCodeAt(n);\n r === 177\n ? (o += '%')\n : r === 178\n ? (o += '!')\n : r === 180\n ? (o += ';')\n : r === 181\n ? (o += '=')\n : r === 32\n ? (o += ' ')\n : r === 10\n ? (o += '\\n')\n : r > 33 && (o += String.fromCharCode(r - 1));\n }\n\n $node.html(o);\n $node.removeClass('obfuscated');\n $node.addClass('deobfuscated');\n return null;\n },\n },\n\n // Is there anything that is in the result that shouldn't be?\n // The clean selectors will remove anything that matches from\n // the result\n clean: [],\n },\n};\n","export const WwwGrueneDeExtractor = {\n domain: 'www.gruene.de',\n\n title: {\n selectors: ['header h1'],\n },\n\n author: null,\n\n date_published: null,\n\n dek: null,\n\n lead_image_url: {\n selectors: [['meta[property=\"og:image\"]', 'content']],\n },\n\n content: {\n // selectors: ['section'],\n selectors: [['section header', 'section h2', 'section p', 'section ol']],\n\n // Is there anything in the content you selected that needs transformed\n // before it's consumable content? E.g., unusual lazy loaded images\n transforms: {},\n\n // Is there anything that is in the result that shouldn't be?\n // The clean selectors will remove anything that matches from\n // the result\n clean: ['figcaption', 'p[class]'],\n },\n};\n","export const WwwEngadgetComExtractor = {\n domain: 'www.engadget.com',\n\n title: {\n selectors: [['meta[name=\"og:title\"]', 'value']],\n },\n\n author: {\n selectors: ['a.th-meta[data-ylk*=\"subsec:author\"]'],\n },\n\n // Engadget stories have publish dates, but the only representation of them on the page\n // is in a format like \"2h ago\". There are also these tags with blank values:\n // \n date_published: {\n selectors: [\n // enter selectors\n ],\n },\n\n dek: {\n selectors: ['div[class*=\"o-title_mark\"] div'],\n },\n\n // Engadget stories do have lead images specified by an og:image meta tag, but selecting\n // the value attribute of that tag fails. I believe the \"ℑ\" sequence of characters\n // is triggering this inability to select the attribute value.\n lead_image_url: {\n selectors: [\n // enter selectors\n ],\n },\n\n content: {\n selectors: [\n [\n // Some figures will be inside div.article-text, but some header figures/images\n // will not.\n '#page_body figure:not(div.article-text figure)',\n 'div.article-text',\n ],\n ],\n\n // Is there anything in the content you selected that needs transformed\n // before it's consumable content? E.g., unusual lazy loaded images\n transforms: {},\n\n // Is there anything that is in the result that shouldn't be?\n // The clean selectors will remove anything that matches from\n // the result\n clean: [],\n },\n};\n","export const ArstechnicaComExtractor = {\n domain: 'arstechnica.com',\n\n // Articles from this site are often paginated, but I was unable to write a CSS\n // selector to find the next page. On the last page, there will be a link with a CSS\n // selector indicating that the previous page is next. But the parser appears to find\n // the next page without this extractor finding it, as long as the fallback option is\n // left at its default value of true.\n\n title: {\n selectors: ['title'],\n },\n\n author: {\n selectors: ['*[rel=\"author\"] *[itemprop=\"name\"]'],\n },\n\n date_published: {\n selectors: [['.byline time', 'datetime']],\n },\n\n dek: {\n selectors: ['h2[itemprop=\"description\"]'],\n },\n\n lead_image_url: {\n selectors: [['meta[name=\"og:image\"]', 'value']],\n },\n\n content: {\n selectors: ['div[itemprop=\"articleBody\"]'],\n\n // Is there anything in the content you selected that needs transformed\n // before it's consumable content? E.g., unusual lazy loaded images\n transforms: {\n h2: $node => {\n // Some pages have an element h2 that is significant, and that the parser will\n // remove if not following a paragraph. Adding this empty paragraph fixes it, and\n // the empty paragraph will be removed anyway.\n $node.before('');\n },\n },\n\n // Is there anything that is in the result that shouldn't be?\n // The clean selectors will remove anything that matches from\n // the result.\n clean: [\n // Remove enlarge links and separators inside image captions.\n 'figcaption .enlarge-link',\n 'figcaption .sep',\n\n // I could not transform the video into usable elements, so I\n // removed them.\n 'figure.video',\n\n // Image galleries that do not work.\n '.gallery',\n\n 'aside',\n '.sidebar',\n ],\n },\n};\n","export const WwwNdtvComExtractor = {\n domain: 'www.ndtv.com',\n\n title: {\n selectors: [['meta[name=\"og:title\"]', 'value'], 'h1.entry-title'],\n },\n\n author: {\n selectors: ['span[itemprop=\"author\"] span[itemprop=\"name\"]'],\n },\n\n date_published: {\n selectors: [['span[itemprop=\"dateModified\"]', 'content']],\n },\n\n dek: {\n selectors: ['h2'],\n },\n\n lead_image_url: {\n selectors: [['meta[name=\"og:image\"]', 'value']],\n },\n\n content: {\n selectors: ['div[itemprop=\"articleBody\"]'],\n\n // Is there anything in the content you selected that needs transformed\n // before it's consumable content? E.g., unusual lazy loaded images\n transforms: {\n // This site puts a dateline in a 'b' above the first paragraph, and then somehow\n // blends it into the first paragraph with CSS. This transform moves the dateline\n // to the first paragraph.\n '.place_cont': $node => {\n if (!$node.parents('p').length) {\n const nextSibling = $node.next('p');\n if (nextSibling) {\n $node.remove();\n nextSibling.prepend($node);\n }\n }\n },\n },\n\n // Is there anything that is in the result that shouldn't be?\n // The clean selectors will remove anything that matches from\n // the result\n clean: [\n '.highlghts_Wdgt',\n '.ins_instory_dv_caption',\n 'input',\n '._world-wrapper .mt20',\n ],\n },\n};\n","export const SpektrumExtractor = {\n domain: 'www.spektrum.de',\n\n title: {\n selectors: ['.content__title'],\n },\n\n author: {\n selectors: ['.content__author__info__name'],\n },\n\n date_published: {\n selectors: ['.content__meta__date'],\n timezone: 'Europe/Berlin',\n },\n\n dek: {\n selectors: ['.content__intro'],\n },\n\n lead_image_url: {\n selectors: [\n // This is how the meta tag appears in the original source code.\n ['meta[name=\"og:image\"]', 'value'],\n // This is how the meta tag appears in the DOM in Chrome.\n // The selector is included here to make the code work within the browser as well.\n ['meta[property=\"og:image\"]', 'content'],\n // This is the image that is shown on the page.\n // It can be slightly cropped compared to the original in the meta tag.\n '.image__article__top img',\n ],\n },\n\n content: {\n selectors: ['article.content'],\n clean: [\n '.breadcrumbs',\n '.hide-for-print',\n 'aside',\n 'header h2',\n '.image__article__top',\n '.content__author',\n '.copyright',\n '.callout-box',\n ],\n },\n};\n","import mergeSupportedDomains from 'utils/merge-supported-domains';\nimport * as CustomExtractors from './custom/index';\n\nexport default Object.keys(CustomExtractors).reduce((acc, key) => {\n const extractor = CustomExtractors[key];\n return {\n ...acc,\n ...mergeSupportedDomains(extractor),\n };\n}, {});\n","// CLEAN AUTHOR CONSTANTS\nexport const CLEAN_AUTHOR_RE = /^\\s*(posted |written )?by\\s*:?\\s*(.*)/i;\n\n// CLEAN DEK CONSTANTS\nexport const TEXT_LINK_RE = new RegExp('http(s)?://', 'i');\n// An ordered list of meta tag names that denote likely article deks.\n// From most distinct to least distinct.\n//\n// NOTE: There are currently no meta tags that seem to provide the right\n// content consistenty enough. Two options were:\n// - og:description\n// - dc.description\n// However, these tags often have SEO-specific junk in them that's not\n// header-worthy like a dek is. Excerpt material at best.\nexport const DEK_META_TAGS = [];\n\n// An ordered list of Selectors to find likely article deks. From\n// most explicit to least explicit.\n//\n// Should be more restrictive than not, as a failed dek can be pretty\n// detrimental to the aesthetics of an article.\nexport const DEK_SELECTORS = ['.entry-summary'];\n\n// CLEAN DATE PUBLISHED CONSTANTS\nexport const MS_DATE_STRING = /^\\d{13}$/i;\nexport const SEC_DATE_STRING = /^\\d{10}$/i;\nexport const CLEAN_DATE_STRING_RE = /^\\s*published\\s*:?\\s*(.*)/i;\nexport const TIME_MERIDIAN_SPACE_RE = /(.*\\d)(am|pm)(.*)/i;\nexport const TIME_MERIDIAN_DOTS_RE = /\\.m\\./i;\nexport const TIME_NOW_STRING = /^\\s*(just|right)?\\s*now\\s*/i;\nconst timeUnits = [\n 'seconds?',\n 'minutes?',\n 'hours?',\n 'days?',\n 'weeks?',\n 'months?',\n 'years?',\n];\nconst allTimeUnits = timeUnits.join('|');\nexport const TIME_AGO_STRING = new RegExp(\n `(\\\\d+)\\\\s+(${allTimeUnits})\\\\s+ago`,\n 'i'\n);\nconst months = [\n 'jan',\n 'feb',\n 'mar',\n 'apr',\n 'may',\n 'jun',\n 'jul',\n 'aug',\n 'sep',\n 'oct',\n 'nov',\n 'dec',\n];\nconst allMonths = months.join('|');\nconst timestamp1 = '[0-9]{1,2}:[0-9]{2,2}( ?[ap].?m.?)?';\nconst timestamp2 = '[0-9]{1,2}[/-][0-9]{1,2}[/-][0-9]{2,4}';\nconst timestamp3 = '-[0-9]{3,4}$';\nexport const SPLIT_DATE_STRING = new RegExp(\n `(${timestamp1})|(${timestamp2})|(${timestamp3})|([0-9]{1,4})|(${allMonths})`,\n 'ig'\n);\n\n// 2016-11-22T08:57-500\n// Check if datetime string has an offset at the end\nexport const TIME_WITH_OFFSET_RE = /-\\d{3,4}$/;\n\n// CLEAN TITLE CONSTANTS\n// A regular expression that will match separating characters on a\n// title, that usually denote breadcrumbs or something similar.\nexport const TITLE_SPLITTERS_RE = /(: | - | \\| )/g;\n\nexport const DOMAIN_ENDINGS_RE = new RegExp('.com$|.net$|.org$|.co.uk$', 'g');\n","import { normalizeSpaces } from 'utils/text';\nimport { CLEAN_AUTHOR_RE } from './constants';\n\n// Take an author string (like 'By David Smith ') and clean it to\n// just the name(s): 'David Smith'.\nexport default function cleanAuthor(author) {\n return normalizeSpaces(author.replace(CLEAN_AUTHOR_RE, '$2').trim());\n}\n","import validUrl from 'valid-url';\n\nexport default function clean(leadImageUrl) {\n leadImageUrl = leadImageUrl.trim();\n if (validUrl.isWebUri(leadImageUrl)) {\n return leadImageUrl;\n }\n\n return null;\n}\n","import { stripTags } from 'utils/dom';\nimport { excerptContent, normalizeSpaces } from 'utils/text';\n\nimport { TEXT_LINK_RE } from './constants';\n\n// Take a dek HTML fragment, and return the cleaned version of it.\n// Return None if the dek wasn't good enough.\nexport default function cleanDek(dek, { $, excerpt }) {\n // Sanity check that we didn't get too short or long of a dek.\n if (dek.length > 1000 || dek.length < 5) return null;\n\n // Check that dek isn't the same as excerpt\n if (excerpt && excerptContent(excerpt, 10) === excerptContent(dek, 10))\n return null;\n\n const dekText = stripTags(dek, $);\n\n // Plain text links shouldn't exist in the dek. If we have some, it's\n // not a good dek - bail.\n if (TEXT_LINK_RE.test(dekText)) return null;\n\n return normalizeSpaces(dekText.trim());\n}\n","import moment from 'moment-timezone';\nimport parseFormat from 'moment-parseformat';\n// Is there a compelling reason to use moment here?\n// Mostly only being used for the isValid() method,\n// but could just check for 'Invalid Date' string.\n\nimport {\n MS_DATE_STRING,\n SEC_DATE_STRING,\n CLEAN_DATE_STRING_RE,\n SPLIT_DATE_STRING,\n TIME_AGO_STRING,\n TIME_NOW_STRING,\n TIME_MERIDIAN_SPACE_RE,\n TIME_MERIDIAN_DOTS_RE,\n TIME_WITH_OFFSET_RE,\n} from './constants';\n\nexport function cleanDateString(dateString) {\n return (dateString.match(SPLIT_DATE_STRING) || [])\n .join(' ')\n .replace(TIME_MERIDIAN_DOTS_RE, 'm')\n .replace(TIME_MERIDIAN_SPACE_RE, '$1 $2 $3')\n .replace(CLEAN_DATE_STRING_RE, '$1')\n .trim();\n}\n\nexport function createDate(dateString, timezone, format) {\n if (TIME_WITH_OFFSET_RE.test(dateString)) {\n return moment(new Date(dateString));\n }\n\n if (TIME_AGO_STRING.test(dateString)) {\n const fragments = TIME_AGO_STRING.exec(dateString);\n return moment().subtract(fragments[1], fragments[2]);\n }\n\n if (TIME_NOW_STRING.test(dateString)) {\n return moment();\n }\n\n return timezone\n ? moment.tz(dateString, format || parseFormat(dateString), timezone)\n : moment(dateString, format || parseFormat(dateString));\n}\n\n// Take a date published string, and hopefully return a date out of\n// it. Return none if we fail.\nexport default function cleanDatePublished(\n dateString,\n { timezone, format } = {}\n) {\n // If string is in milliseconds or seconds, convert to int and return\n if (MS_DATE_STRING.test(dateString) || SEC_DATE_STRING.test(dateString)) {\n return new Date(parseInt(dateString, 10)).toISOString();\n }\n\n let date = createDate(dateString, timezone, format);\n\n if (!date.isValid()) {\n dateString = cleanDateString(dateString);\n date = createDate(dateString, timezone, format);\n }\n\n return date.isValid() ? date.toISOString() : null;\n}\n","import {\n cleanAttributes,\n cleanHeaders,\n cleanHOnes,\n cleanImages,\n cleanTags,\n removeEmpty,\n rewriteTopLevel,\n markToKeep,\n stripJunkTags,\n makeLinksAbsolute,\n} from 'utils/dom';\n\n// Clean our article content, returning a new, cleaned node.\nexport default function extractCleanNode(\n article,\n { $, cleanConditionally = true, title = '', url = '', defaultCleaner = true }\n) {\n // Rewrite the tag name to div if it's a top level node like body or\n // html to avoid later complications with multiple body tags.\n rewriteTopLevel(article, $);\n\n // Drop small images and spacer images\n // Only do this is defaultCleaner is set to true;\n // this can sometimes be too aggressive.\n if (defaultCleaner) cleanImages(article, $);\n\n // Make links absolute\n makeLinksAbsolute(article, $, url);\n\n // Mark elements to keep that would normally be removed.\n // E.g., stripJunkTags will remove iframes, so we're going to mark\n // YouTube/Vimeo videos as elements we want to keep.\n markToKeep(article, $, url);\n\n // Drop certain tags like , etc\n // This is -mostly- for cleanliness, not security.\n stripJunkTags(article, $);\n\n // H1 tags are typically the article title, which should be extracted\n // by the title extractor instead. If there's less than 3 of them (<3),\n // strip them. Otherwise, turn 'em into H2s.\n cleanHOnes(article, $);\n\n // Clean headers\n cleanHeaders(article, $, title);\n\n // We used to clean UL's and OL's here, but it was leading to\n // too many in-article lists being removed. Consider a better\n // way to detect menus particularly and remove them.\n // Also optionally running, since it can be overly aggressive.\n if (defaultCleaner) cleanTags(article, $, cleanConditionally);\n\n // Remove empty paragraph nodes\n removeEmpty(article, $);\n\n // Remove unnecessary attributes\n cleanAttributes(article, $);\n\n return article;\n}\n","import { stripTags } from 'utils/dom';\nimport { normalizeSpaces } from 'utils/text';\n\nimport { TITLE_SPLITTERS_RE } from './constants';\nimport { resolveSplitTitle } from './index';\n\nexport default function cleanTitle(title, { url, $ }) {\n // If title has |, :, or - in it, see if\n // we can clean it up.\n if (TITLE_SPLITTERS_RE.test(title)) {\n title = resolveSplitTitle(title, url);\n }\n\n // Final sanity check that we didn't get a crazy title.\n // if (title.length > 150 || title.length < 15) {\n if (title.length > 150) {\n // If we did, return h1 from the document if it exists\n const h1 = $('h1');\n if (h1.length === 1) {\n title = h1.text();\n }\n }\n\n // strip any html tags in the title text\n return normalizeSpaces(stripTags(title, $).trim());\n}\n","import URL from 'url';\nimport wuzzy from 'wuzzy';\n\nimport { TITLE_SPLITTERS_RE, DOMAIN_ENDINGS_RE } from './constants';\n\nfunction extractBreadcrumbTitle(splitTitle, text) {\n // This must be a very breadcrumbed title, like:\n // The Best Gadgets on Earth : Bits : Blogs : NYTimes.com\n // NYTimes - Blogs - Bits - The Best Gadgets on Earth\n if (splitTitle.length >= 6) {\n // Look to see if we can find a breadcrumb splitter that happens\n // more than once. If we can, we'll be able to better pull out\n // the title.\n const termCounts = splitTitle.reduce((acc, titleText) => {\n acc[titleText] = acc[titleText] ? acc[titleText] + 1 : 1;\n return acc;\n }, {});\n\n const [maxTerm, termCount] = Reflect.ownKeys(termCounts).reduce(\n (acc, key) => {\n if (acc[1] < termCounts[key]) {\n return [key, termCounts[key]];\n }\n\n return acc;\n },\n [0, 0]\n );\n\n // We found a splitter that was used more than once, so it\n // is probably the breadcrumber. Split our title on that instead.\n // Note: max_term should be <= 4 characters, so that \" >> \"\n // will match, but nothing longer than that.\n if (termCount >= 2 && maxTerm.length <= 4) {\n splitTitle = text.split(maxTerm);\n }\n\n const splitEnds = [splitTitle[0], splitTitle.slice(-1)];\n const longestEnd = splitEnds.reduce(\n (acc, end) => (acc.length > end.length ? acc : end),\n ''\n );\n\n if (longestEnd.length > 10) {\n return longestEnd;\n }\n\n return text;\n }\n\n return null;\n}\n\nfunction cleanDomainFromTitle(splitTitle, url) {\n // Search the ends of the title, looking for bits that fuzzy match\n // the URL too closely. If one is found, discard it and return the\n // rest.\n //\n // Strip out the big TLDs - it just makes the matching a bit more\n // accurate. Not the end of the world if it doesn't strip right.\n const { host } = URL.parse(url);\n const nakedDomain = host.replace(DOMAIN_ENDINGS_RE, '');\n\n const startSlug = splitTitle[0].toLowerCase().replace(' ', '');\n const startSlugRatio = wuzzy.levenshtein(startSlug, nakedDomain);\n\n if (startSlugRatio > 0.4 && startSlug.length > 5) {\n return splitTitle.slice(2).join('');\n }\n\n const endSlug = splitTitle\n .slice(-1)[0]\n .toLowerCase()\n .replace(' ', '');\n const endSlugRatio = wuzzy.levenshtein(endSlug, nakedDomain);\n\n if (endSlugRatio > 0.4 && endSlug.length >= 5) {\n return splitTitle.slice(0, -2).join('');\n }\n\n return null;\n}\n\n// Given a title with separators in it (colons, dashes, etc),\n// resolve whether any of the segments should be removed.\nexport default function resolveSplitTitle(title, url = '') {\n // Splits while preserving splitters, like:\n // ['The New New York', ' - ', 'The Washington Post']\n const splitTitle = title.split(TITLE_SPLITTERS_RE);\n if (splitTitle.length === 1) {\n return title;\n }\n\n let newTitle = extractBreadcrumbTitle(splitTitle, title);\n if (newTitle) return newTitle;\n\n newTitle = cleanDomainFromTitle(splitTitle, url);\n if (newTitle) return newTitle;\n\n // Fuzzy ratio didn't find anything, so this title is probably legit.\n // Just return it all.\n return title;\n}\n","import cleanAuthor from './author';\nimport cleanImage from './lead-image-url';\nimport cleanDek from './dek';\nimport cleanDatePublished from './date-published';\nimport cleanContent from './content';\nimport cleanTitle from './title';\n\nconst Cleaners = {\n author: cleanAuthor,\n lead_image_url: cleanImage,\n dek: cleanDek,\n date_published: cleanDatePublished,\n content: cleanContent,\n title: cleanTitle,\n};\n\nexport default Cleaners;\n\nexport { cleanAuthor };\nexport { cleanImage };\nexport { cleanDek };\nexport { cleanDatePublished };\nexport { cleanContent };\nexport { cleanTitle };\nexport { default as resolveSplitTitle } from './resolve-split-title';\n","import { stripUnlikelyCandidates, convertToParagraphs } from 'utils/dom';\n\nimport { scoreContent, findTopCandidate } from './scoring';\n\n// Using a variety of scoring techniques, extract the content most\n// likely to be article text.\n//\n// If strip_unlikely_candidates is True, remove any elements that\n// match certain criteria first. (Like, does this element have a\n// classname of \"comment\")\n//\n// If weight_nodes is True, use classNames and IDs to determine the\n// worthiness of nodes.\n//\n// Returns a cheerio object $\nexport default function extractBestNode($, opts) {\n if (opts.stripUnlikelyCandidates) {\n $ = stripUnlikelyCandidates($);\n }\n\n $ = convertToParagraphs($);\n $ = scoreContent($, opts.weightNodes);\n const $topCandidate = findTopCandidate($);\n\n return $topCandidate;\n}\n","import cheerio from 'cheerio';\n\nimport { nodeIsSufficient } from 'utils/dom';\nimport { cleanContent } from 'cleaners';\nimport { normalizeSpaces } from 'utils/text';\n\nimport extractBestNode from './extract-best-node';\n\nconst GenericContentExtractor = {\n defaultOpts: {\n stripUnlikelyCandidates: true,\n weightNodes: true,\n cleanConditionally: true,\n },\n\n // Extract the content for this resource - initially, pass in our\n // most restrictive opts which will return the highest quality\n // content. On each failure, retry with slightly more lax opts.\n //\n // :param return_type: string. If \"node\", should return the content\n // as a cheerio node rather than as an HTML string.\n //\n // Opts:\n // stripUnlikelyCandidates: Remove any elements that match\n // non-article-like criteria first.(Like, does this element\n // have a classname of \"comment\")\n //\n // weightNodes: Modify an elements score based on whether it has\n // certain classNames or IDs. Examples: Subtract if a node has\n // a className of 'comment', Add if a node has an ID of\n // 'entry-content'.\n //\n // cleanConditionally: Clean the node to return of some\n // superfluous content. Things like forms, ads, etc.\n extract({ $, html, title, url }, opts) {\n opts = { ...this.defaultOpts, ...opts };\n\n $ = $ || cheerio.load(html);\n\n // Cascade through our extraction-specific opts in an ordered fashion,\n // turning them off as we try to extract content.\n let node = this.getContentNode($, title, url, opts);\n\n if (nodeIsSufficient(node)) {\n return this.cleanAndReturnNode(node, $);\n }\n\n // We didn't succeed on first pass, one by one disable our\n // extraction opts and try again.\n // eslint-disable-next-line no-restricted-syntax\n for (const key of Reflect.ownKeys(opts).filter(k => opts[k] === true)) {\n opts[key] = false;\n $ = cheerio.load(html);\n\n node = this.getContentNode($, title, url, opts);\n\n if (nodeIsSufficient(node)) {\n break;\n }\n }\n\n return this.cleanAndReturnNode(node, $);\n },\n\n // Get node given current options\n getContentNode($, title, url, opts) {\n return cleanContent(extractBestNode($, opts), {\n $,\n cleanConditionally: opts.cleanConditionally,\n title,\n url,\n });\n },\n\n // Once we got here, either we're at our last-resort node, or\n // we broke early. Make sure we at least have -something- before we\n // move forward.\n cleanAndReturnNode(node, $) {\n if (!node) {\n return null;\n }\n\n return normalizeSpaces($.html(node));\n },\n};\n\nexport default GenericContentExtractor;\n","// TODO: It would be great if we could merge the meta and selector lists into\n// a list of objects, because we could then rank them better. For example,\n// .hentry .entry-title is far better suited than .\n\n// An ordered list of meta tag names that denote likely article titles. All\n// attributes should be lowercase for faster case-insensitive matching. From\n// most distinct to least distinct.\nexport const STRONG_TITLE_META_TAGS = [\n 'tweetmeme-title',\n 'dc.title',\n 'rbtitle',\n 'headline',\n 'title',\n];\n\n// og:title is weak because it typically contains context that we don't like,\n// for example the source site's name. Gotta get that brand into facebook!\nexport const WEAK_TITLE_META_TAGS = ['og:title'];\n\n// An ordered list of XPath Selectors to find likely article titles. From\n// most explicit to least explicit.\n//\n// Note - this does not use classes like CSS. This checks to see if the string\n// exists in the className, which is not as accurate as .className (which\n// splits on spaces/endlines), but for our purposes it's close enough. The\n// speed tradeoff is worth the accuracy hit.\nexport const STRONG_TITLE_SELECTORS = [\n '.hentry .entry-title',\n 'h1#articleHeader',\n 'h1.articleHeader',\n 'h1.article',\n '.instapaper_title',\n '#meebo-title',\n];\n\nexport const WEAK_TITLE_SELECTORS = [\n 'article h1',\n '#entry-title',\n '.entry-title',\n '#entryTitle',\n '#entrytitle',\n '.entryTitle',\n '.entrytitle',\n '#articleTitle',\n '.articleTitle',\n 'post post-title',\n 'h1.title',\n 'h2.article',\n 'h1',\n 'html head title',\n 'title',\n];\n","import { cleanTitle } from 'cleaners';\nimport { extractFromMeta, extractFromSelectors } from 'utils/dom';\n\nimport {\n STRONG_TITLE_META_TAGS,\n WEAK_TITLE_META_TAGS,\n STRONG_TITLE_SELECTORS,\n WEAK_TITLE_SELECTORS,\n} from './constants';\n\nconst GenericTitleExtractor = {\n extract({ $, url, metaCache }) {\n // First, check to see if we have a matching meta tag that we can make\n // use of that is strongly associated with the headline.\n let title;\n\n title = extractFromMeta($, STRONG_TITLE_META_TAGS, metaCache);\n if (title) return cleanTitle(title, { url, $ });\n\n // Second, look through our content selectors for the most likely\n // article title that is strongly associated with the headline.\n title = extractFromSelectors($, STRONG_TITLE_SELECTORS);\n if (title) return cleanTitle(title, { url, $ });\n\n // Third, check for weaker meta tags that may match.\n title = extractFromMeta($, WEAK_TITLE_META_TAGS, metaCache);\n if (title) return cleanTitle(title, { url, $ });\n\n // Last, look for weaker selector tags that may match.\n title = extractFromSelectors($, WEAK_TITLE_SELECTORS);\n if (title) return cleanTitle(title, { url, $ });\n\n // If no matches, return an empty string\n return '';\n },\n};\n\nexport default GenericTitleExtractor;\n","// An ordered list of meta tag names that denote likely article authors. All\n// attributes should be lowercase for faster case-insensitive matching. From\n// most distinct to least distinct.\n//\n// Note: \"author\" is too often the -developer- of the page, so it is not\n// added here.\nexport const AUTHOR_META_TAGS = [\n 'byl',\n 'clmst',\n 'dc.author',\n 'dcsext.author',\n 'dc.creator',\n 'rbauthors',\n 'authors',\n];\n\nexport const AUTHOR_MAX_LENGTH = 300;\n\n// An ordered list of XPath Selectors to find likely article authors. From\n// most explicit to least explicit.\n//\n// Note - this does not use classes like CSS. This checks to see if the string\n// exists in the className, which is not as accurate as .className (which\n// splits on spaces/endlines), but for our purposes it's close enough. The\n// speed tradeoff is worth the accuracy hit.\nexport const AUTHOR_SELECTORS = [\n '.entry .entry-author',\n '.author.vcard .fn',\n '.author .vcard .fn',\n '.byline.vcard .fn',\n '.byline .vcard .fn',\n '.byline .by .author',\n '.byline .by',\n '.byline .author',\n '.post-author.vcard',\n '.post-author .vcard',\n 'a[rel=author]',\n '#by_author',\n '.by_author',\n '#entryAuthor',\n '.entryAuthor',\n '.byline a[href*=author]',\n '#author .authorname',\n '.author .authorname',\n '#author',\n '.author',\n '.articleauthor',\n '.ArticleAuthor',\n '.byline',\n];\n\n// An ordered list of Selectors to find likely article authors, with\n// regular expression for content.\nconst bylineRe = /^[\\n\\s]*By/i;\nexport const BYLINE_SELECTORS_RE = [\n ['#byline', bylineRe],\n ['.byline', bylineRe],\n];\n","import { cleanAuthor } from 'cleaners';\nimport { extractFromMeta, extractFromSelectors } from 'utils/dom';\n\nimport {\n AUTHOR_META_TAGS,\n AUTHOR_MAX_LENGTH,\n AUTHOR_SELECTORS,\n BYLINE_SELECTORS_RE,\n} from './constants';\n\nconst GenericAuthorExtractor = {\n extract({ $, metaCache }) {\n let author;\n\n // First, check to see if we have a matching\n // meta tag that we can make use of.\n author = extractFromMeta($, AUTHOR_META_TAGS, metaCache);\n if (author && author.length < AUTHOR_MAX_LENGTH) {\n return cleanAuthor(author);\n }\n\n // Second, look through our selectors looking for potential authors.\n author = extractFromSelectors($, AUTHOR_SELECTORS, 2);\n if (author && author.length < AUTHOR_MAX_LENGTH) {\n return cleanAuthor(author);\n }\n\n // Last, use our looser regular-expression based selectors for\n // potential authors.\n // eslint-disable-next-line no-restricted-syntax\n for (const [selector, regex] of BYLINE_SELECTORS_RE) {\n const node = $(selector);\n if (node.length === 1) {\n const text = node.text();\n if (regex.test(text)) {\n return cleanAuthor(text);\n }\n }\n }\n\n return null;\n },\n};\n\nexport default GenericAuthorExtractor;\n","// An ordered list of meta tag names that denote\n// likely date published dates. All attributes\n// should be lowercase for faster case-insensitive matching.\n// From most distinct to least distinct.\nexport const DATE_PUBLISHED_META_TAGS = [\n 'article:published_time',\n 'displaydate',\n 'dc.date',\n 'dc.date.issued',\n 'rbpubdate',\n 'publish_date',\n 'pub_date',\n 'pagedate',\n 'pubdate',\n 'revision_date',\n 'doc_date',\n 'date_created',\n 'content_create_date',\n 'lastmodified',\n 'created',\n 'date',\n];\n\n// An ordered list of XPath Selectors to find\n// likely date published dates. From most explicit\n// to least explicit.\nexport const DATE_PUBLISHED_SELECTORS = [\n '.hentry .dtstamp.published',\n '.hentry .published',\n '.hentry .dtstamp.updated',\n '.hentry .updated',\n '.single .published',\n '.meta .published',\n '.meta .postDate',\n '.entry-date',\n '.byline .date',\n '.postmetadata .date',\n '.article_datetime',\n '.date-header',\n '.story-date',\n '.dateStamp',\n '#story .datetime',\n '.dateline',\n '.pubdate',\n];\n\n// An ordered list of compiled regular expressions to find likely date\n// published dates from the URL. These should always have the first\n// reference be a date string that is parseable by dateutil.parser.parse\nconst abbrevMonthsStr = '(jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec)';\nexport const DATE_PUBLISHED_URL_RES = [\n new RegExp('/(20\\\\d{2}/\\\\d{2}/\\\\d{2})/', 'i'),\n new RegExp('(20\\\\d{2}-[01]\\\\d-[0-3]\\\\d)', 'i'),\n new RegExp(`/(20\\\\d{2}/${abbrevMonthsStr}/[0-3]\\\\d)/`, 'i'),\n];\n","import { cleanDatePublished } from 'cleaners';\nimport { extractFromMeta, extractFromSelectors } from 'utils/dom';\nimport { extractFromUrl } from 'utils/text';\n\nimport {\n DATE_PUBLISHED_META_TAGS,\n DATE_PUBLISHED_SELECTORS,\n DATE_PUBLISHED_URL_RES,\n} from './constants';\n\nconst GenericDatePublishedExtractor = {\n extract({ $, url, metaCache }) {\n let datePublished;\n // First, check to see if we have a matching meta tag\n // that we can make use of.\n // Don't try cleaning tags from this string\n datePublished = extractFromMeta(\n $,\n DATE_PUBLISHED_META_TAGS,\n metaCache,\n false\n );\n if (datePublished) return cleanDatePublished(datePublished);\n\n // Second, look through our selectors looking for potential\n // date_published's.\n datePublished = extractFromSelectors($, DATE_PUBLISHED_SELECTORS);\n if (datePublished) return cleanDatePublished(datePublished);\n\n // Lastly, look to see if a dately string exists in the URL\n datePublished = extractFromUrl(url, DATE_PUBLISHED_URL_RES);\n if (datePublished) return cleanDatePublished(datePublished);\n\n return null;\n },\n};\n\nexport default GenericDatePublishedExtractor;\n","// Currently there is only one selector for\n// deks. We should simply return null here\n// until we have a more robust generic option.\n// Below is the original source for this, for reference.\nconst GenericDekExtractor = {\n extract() {\n return null;\n },\n};\n\nexport default GenericDekExtractor;\n","// An ordered list of meta tag names that denote likely article leading images.\n// All attributes should be lowercase for faster case-insensitive matching.\n// From most distinct to least distinct.\nexport const LEAD_IMAGE_URL_META_TAGS = [\n 'og:image',\n 'twitter:image',\n 'image_src',\n];\n\nexport const LEAD_IMAGE_URL_SELECTORS = ['link[rel=image_src]'];\n\nexport const POSITIVE_LEAD_IMAGE_URL_HINTS = [\n 'upload',\n 'wp-content',\n 'large',\n 'photo',\n 'wp-image',\n];\nexport const POSITIVE_LEAD_IMAGE_URL_HINTS_RE = new RegExp(\n POSITIVE_LEAD_IMAGE_URL_HINTS.join('|'),\n 'i'\n);\n\nexport const NEGATIVE_LEAD_IMAGE_URL_HINTS = [\n 'spacer',\n 'sprite',\n 'blank',\n 'throbber',\n 'gradient',\n 'tile',\n 'bg',\n 'background',\n 'icon',\n 'social',\n 'header',\n 'hdr',\n 'advert',\n 'spinner',\n 'loader',\n 'loading',\n 'default',\n 'rating',\n 'share',\n 'facebook',\n 'twitter',\n 'theme',\n 'promo',\n 'ads',\n 'wp-includes',\n];\nexport const NEGATIVE_LEAD_IMAGE_URL_HINTS_RE = new RegExp(\n NEGATIVE_LEAD_IMAGE_URL_HINTS.join('|'),\n 'i'\n);\n\nexport const GIF_RE = /\\.gif(\\?.*)?$/i;\nexport const JPG_RE = /\\.jpe?g(\\?.*)?$/i;\n","import {\n POSITIVE_LEAD_IMAGE_URL_HINTS_RE,\n NEGATIVE_LEAD_IMAGE_URL_HINTS_RE,\n GIF_RE,\n JPG_RE,\n} from './constants';\n\nimport { PHOTO_HINTS_RE } from '../content/scoring/constants';\n\nfunction getSig($node) {\n return `${$node.attr('class') || ''} ${$node.attr('id') || ''}`;\n}\n\n// Scores image urls based on a variety of heuristics.\nexport function scoreImageUrl(url) {\n url = url.trim();\n let score = 0;\n\n if (POSITIVE_LEAD_IMAGE_URL_HINTS_RE.test(url)) {\n score += 20;\n }\n\n if (NEGATIVE_LEAD_IMAGE_URL_HINTS_RE.test(url)) {\n score -= 20;\n }\n\n // TODO: We might want to consider removing this as\n // gifs are much more common/popular than they once were\n if (GIF_RE.test(url)) {\n score -= 10;\n }\n\n if (JPG_RE.test(url)) {\n score += 10;\n }\n\n // PNGs are neutral.\n\n return score;\n}\n\n// Alt attribute usually means non-presentational image.\nexport function scoreAttr($img) {\n if ($img.attr('alt')) {\n return 5;\n }\n\n return 0;\n}\n\n// Look through our parent and grandparent for figure-like\n// container elements, give a bonus if we find them\nexport function scoreByParents($img) {\n let score = 0;\n const $figParent = $img.parents('figure').first();\n\n if ($figParent.length === 1) {\n score += 25;\n }\n\n const $parent = $img.parent();\n let $gParent;\n if ($parent.length === 1) {\n $gParent = $parent.parent();\n }\n\n [$parent, $gParent].forEach($node => {\n if (PHOTO_HINTS_RE.test(getSig($node))) {\n score += 15;\n }\n });\n\n return score;\n}\n\n// Look at our immediate sibling and see if it looks like it's a\n// caption. Bonus if so.\nexport function scoreBySibling($img) {\n let score = 0;\n const $sibling = $img.next();\n const sibling = $sibling.get(0);\n\n if (sibling && sibling.tagName.toLowerCase() === 'figcaption') {\n score += 25;\n }\n\n if (PHOTO_HINTS_RE.test(getSig($sibling))) {\n score += 15;\n }\n\n return score;\n}\n\nexport function scoreByDimensions($img) {\n let score = 0;\n\n const width = parseFloat($img.attr('width'));\n const height = parseFloat($img.attr('height'));\n const src = $img.attr('src');\n\n // Penalty for skinny images\n if (width && width <= 50) {\n score -= 50;\n }\n\n // Penalty for short images\n if (height && height <= 50) {\n score -= 50;\n }\n\n if (width && height && !src.includes('sprite')) {\n const area = width * height;\n if (area < 5000) {\n // Smaller than 50 x 100\n score -= 100;\n } else {\n score += Math.round(area / 1000);\n }\n }\n\n return score;\n}\n\nexport function scoreByPosition($imgs, index) {\n return $imgs.length / 2 - index;\n}\n","import { extractFromMeta } from 'utils/dom';\nimport { cleanImage } from 'cleaners';\n\nimport {\n LEAD_IMAGE_URL_META_TAGS,\n LEAD_IMAGE_URL_SELECTORS,\n} from './constants';\n\nimport {\n scoreImageUrl,\n scoreAttr,\n scoreByParents,\n scoreBySibling,\n scoreByDimensions,\n scoreByPosition,\n} from './score-image';\n\n// Given a resource, try to find the lead image URL from within\n// it. Like content and next page extraction, uses a scoring system\n// to determine what the most likely image may be. Short circuits\n// on really probable things like og:image meta tags.\n//\n// Potential signals to still take advantage of:\n// * domain\n// * weird aspect ratio\nconst GenericLeadImageUrlExtractor = {\n extract({ $, content, metaCache, html }) {\n let cleanUrl;\n if (!$.browser && $('head').length === 0) {\n $('*')\n .first()\n .prepend(html);\n }\n\n // Check to see if we have a matching meta tag that we can make use of.\n // Moving this higher because common practice is now to use large\n // images on things like Open Graph or Twitter cards.\n // images usually have for things like Open Graph.\n const imageUrl = extractFromMeta(\n $,\n LEAD_IMAGE_URL_META_TAGS,\n metaCache,\n false\n );\n\n if (imageUrl) {\n cleanUrl = cleanImage(imageUrl);\n\n if (cleanUrl) return cleanUrl;\n }\n\n // Next, try to find the \"best\" image via the content.\n // We'd rather not have to fetch each image and check dimensions,\n // so try to do some analysis and determine them instead.\n const $content = $(content);\n const imgs = $('img', $content).toArray();\n const imgScores = {};\n\n imgs.forEach((img, index) => {\n const $img = $(img);\n const src = $img.attr('src');\n\n if (!src) return;\n\n let score = scoreImageUrl(src);\n score += scoreAttr($img);\n score += scoreByParents($img);\n score += scoreBySibling($img);\n score += scoreByDimensions($img);\n score += scoreByPosition(imgs, index);\n\n imgScores[src] = score;\n });\n\n const [topUrl, topScore] = Reflect.ownKeys(imgScores).reduce(\n (acc, key) => (imgScores[key] > acc[1] ? [key, imgScores[key]] : acc),\n [null, 0]\n );\n\n if (topScore > 0) {\n cleanUrl = cleanImage(topUrl);\n\n if (cleanUrl) return cleanUrl;\n }\n\n // If nothing else worked, check to see if there are any really\n // probable nodes in the doc, like .\n // eslint-disable-next-line no-restricted-syntax\n for (const selector of LEAD_IMAGE_URL_SELECTORS) {\n const $node = $(selector).first();\n const src = $node.attr('src');\n if (src) {\n cleanUrl = cleanImage(src);\n if (cleanUrl) return cleanUrl;\n }\n\n const href = $node.attr('href');\n if (href) {\n cleanUrl = cleanImage(href);\n if (cleanUrl) return cleanUrl;\n }\n\n const value = $node.attr('value');\n if (value) {\n cleanUrl = cleanImage(value);\n if (cleanUrl) return cleanUrl;\n }\n }\n\n return null;\n },\n};\n\nexport default GenericLeadImageUrlExtractor;\n","import difflib from 'difflib';\n\nexport default function scoreSimilarity(score, articleUrl, href) {\n // Do this last and only if we have a real candidate, because it's\n // potentially expensive computationally. Compare the link to this\n // URL using difflib to get the % similarity of these URLs. On a\n // sliding scale, subtract points from this link based on\n // similarity.\n if (score > 0) {\n const similarity = new difflib.SequenceMatcher(\n null,\n articleUrl,\n href\n ).ratio();\n // Subtract .1 from diff_percent when calculating modifier,\n // which means that if it's less than 10% different, we give a\n // bonus instead. Ex:\n // 3% different = +17.5 points\n // 10% different = 0 points\n // 20% different = -25 points\n const diffPercent = 1.0 - similarity;\n const diffModifier = -(250 * (diffPercent - 0.2));\n return score + diffModifier;\n }\n\n return 0;\n}\n","import { IS_DIGIT_RE } from 'utils/text/constants';\n\nexport default function scoreLinkText(linkText, pageNum) {\n // If the link text can be parsed as a number, give it a minor\n // bonus, with a slight bias towards lower numbered pages. This is\n // so that pages that might not have 'next' in their text can still\n // get scored, and sorted properly by score.\n let score = 0;\n\n if (IS_DIGIT_RE.test(linkText.trim())) {\n const linkTextAsNum = parseInt(linkText, 10);\n // If it's the first page, we already got it on the first call.\n // Give it a negative score. Otherwise, up to page 10, give a\n // small bonus.\n if (linkTextAsNum < 2) {\n score = -30;\n } else {\n score = Math.max(0, 10 - linkTextAsNum);\n }\n\n // If it appears that the current page number is greater than\n // this links page number, it's a very bad sign. Give it a big\n // penalty.\n if (pageNum && pageNum >= linkTextAsNum) {\n score -= 50;\n }\n }\n\n return score;\n}\n","export default function scorePageInLink(pageNum, isWp) {\n // page in the link = bonus. Intentionally ignore wordpress because\n // their ?p=123 link style gets caught by this even though it means\n // separate documents entirely.\n if (pageNum && !isWp) {\n return 50;\n }\n\n return 0;\n}\n","export const DIGIT_RE = /\\d/;\n\n// A list of words that, if found in link text or URLs, likely mean that\n// this link is not a next page link.\nexport const EXTRANEOUS_LINK_HINTS = [\n 'print',\n 'archive',\n 'comment',\n 'discuss',\n 'e-mail',\n 'email',\n 'share',\n 'reply',\n 'all',\n 'login',\n 'sign',\n 'single',\n 'adx',\n 'entry-unrelated',\n];\nexport const EXTRANEOUS_LINK_HINTS_RE = new RegExp(\n EXTRANEOUS_LINK_HINTS.join('|'),\n 'i'\n);\n\n// Match any link text/classname/id that looks like it could mean the next\n// page. Things like: next, continue, >, >>, » but not >|, »| as those can\n// mean last page.\nexport const NEXT_LINK_TEXT_RE = new RegExp(\n '(next|weiter|continue|>([^|]|$)|»([^|]|$))',\n 'i'\n);\n\n// Match any link text/classname/id that looks like it is an end link: things\n// like \"first\", \"last\", \"end\", etc.\nexport const CAP_LINK_TEXT_RE = new RegExp('(first|last|end)', 'i');\n\n// Match any link text/classname/id that looks like it means the previous\n// page.\nexport const PREV_LINK_TEXT_RE = new RegExp('(prev|earl|old|new|<|«)', 'i');\n\n// Match any phrase that looks like it could be page, or paging, or pagination\nexport const PAGE_RE = new RegExp('pag(e|ing|inat)', 'i');\n","import { EXTRANEOUS_LINK_HINTS_RE } from '../constants';\n\nexport default function scoreExtraneousLinks(href) {\n // If the URL itself contains extraneous values, give a penalty.\n if (EXTRANEOUS_LINK_HINTS_RE.test(href)) {\n return -25;\n }\n\n return 0;\n}\n","import { range } from 'utils';\nimport {\n NEGATIVE_SCORE_RE,\n POSITIVE_SCORE_RE,\n PAGE_RE,\n} from 'utils/dom/constants';\nimport { EXTRANEOUS_LINK_HINTS_RE } from '../constants';\n\nfunction makeSig($link) {\n return `${$link.attr('class') || ''} ${$link.attr('id') || ''}`;\n}\n\nexport default function scoreByParents($link) {\n // If a parent node contains paging-like classname or id, give a\n // bonus. Additionally, if a parent_node contains bad content\n // (like 'sponsor'), give a penalty.\n let $parent = $link.parent();\n let positiveMatch = false;\n let negativeMatch = false;\n let score = 0;\n\n Array.from(range(0, 4)).forEach(() => {\n if ($parent.length === 0) {\n return;\n }\n\n const parentData = makeSig($parent, ' ');\n\n // If we have 'page' or 'paging' in our data, that's a good\n // sign. Add a bonus.\n if (!positiveMatch && PAGE_RE.test(parentData)) {\n positiveMatch = true;\n score += 25;\n }\n\n // If we have 'comment' or something in our data, and\n // we don't have something like 'content' as well, that's\n // a bad sign. Give a penalty.\n if (\n !negativeMatch &&\n NEGATIVE_SCORE_RE.test(parentData) &&\n EXTRANEOUS_LINK_HINTS_RE.test(parentData)\n ) {\n if (!POSITIVE_SCORE_RE.test(parentData)) {\n negativeMatch = true;\n score -= 25;\n }\n }\n\n $parent = $parent.parent();\n });\n\n return score;\n}\n","import { PREV_LINK_TEXT_RE } from '../constants';\n\nexport default function scorePrevLink(linkData) {\n // If the link has something like \"previous\", its definitely\n // an old link, skip it.\n if (PREV_LINK_TEXT_RE.test(linkData)) {\n return -200;\n }\n\n return 0;\n}\n","import URL from 'url';\n\nimport { DIGIT_RE, EXTRANEOUS_LINK_HINTS_RE } from '../constants';\n\nexport default function shouldScore(\n href,\n articleUrl,\n baseUrl,\n parsedUrl,\n linkText,\n previousUrls\n) {\n // skip if we've already fetched this url\n if (previousUrls.find(url => href === url) !== undefined) {\n return false;\n }\n\n // If we've already parsed this URL, or the URL matches the base\n // URL, or is empty, skip it.\n if (!href || href === articleUrl || href === baseUrl) {\n return false;\n }\n\n const { hostname } = parsedUrl;\n const { hostname: linkHost } = URL.parse(href);\n\n // Domain mismatch.\n if (linkHost !== hostname) {\n return false;\n }\n\n // If href doesn't contain a digit after removing the base URL,\n // it's certainly not the next page.\n const fragment = href.replace(baseUrl, '');\n if (!DIGIT_RE.test(fragment)) {\n return false;\n }\n\n // This link has extraneous content (like \"comment\") in its link\n // text, so we skip it.\n if (EXTRANEOUS_LINK_HINTS_RE.test(linkText)) {\n return false;\n }\n\n // Next page link text is never long, skip if it is too long.\n if (linkText.length > 25) {\n return false;\n }\n\n return true;\n}\n","export default function scoreBaseUrl(href, baseRegex) {\n // If the baseUrl isn't part of this URL, penalize this\n // link. It could still be the link, but the odds are lower.\n // Example:\n // http://www.actionscript.org/resources/articles/745/1/JavaScript-and-VBScript-Injection-in-ActionScript-3/Page1.html\n if (!baseRegex.test(href)) {\n return -25;\n }\n\n return 0;\n}\n","import { NEXT_LINK_TEXT_RE } from '../constants';\n\nexport default function scoreNextLinkText(linkData) {\n // Things like \"next\", \">>\", etc.\n if (NEXT_LINK_TEXT_RE.test(linkData)) {\n return 50;\n }\n\n return 0;\n}\n","import { NEXT_LINK_TEXT_RE, CAP_LINK_TEXT_RE } from '../constants';\n\nexport default function scoreCapLinks(linkData) {\n // Cap links are links like \"last\", etc.\n if (CAP_LINK_TEXT_RE.test(linkData)) {\n // If we found a link like \"last\", but we've already seen that\n // this link is also \"next\", it's fine. If it's not been\n // previously marked as \"next\", then it's probably bad.\n // Penalize.\n if (NEXT_LINK_TEXT_RE.test(linkData)) {\n return -65;\n }\n }\n\n return 0;\n}\n","import URL from 'url';\n\nimport { getAttrs, isWordpress } from 'utils/dom';\nimport { removeAnchor, pageNumFromUrl } from 'utils/text';\n\nimport {\n scoreSimilarity,\n scoreLinkText,\n scorePageInLink,\n scoreExtraneousLinks,\n scoreByParents,\n scorePrevLink,\n shouldScore,\n scoreBaseUrl,\n scoreCapLinks,\n scoreNextLinkText,\n} from './utils';\n\nexport function makeBaseRegex(baseUrl) {\n return new RegExp(`^${baseUrl}`, 'i');\n}\n\nfunction makeSig($link, linkText) {\n return `${linkText || $link.text()} ${$link.attr('class') || ''} ${$link.attr(\n 'id'\n ) || ''}`;\n}\n\nexport default function scoreLinks({\n links,\n articleUrl,\n baseUrl,\n parsedUrl,\n $,\n previousUrls = [],\n}) {\n parsedUrl = parsedUrl || URL.parse(articleUrl);\n const baseRegex = makeBaseRegex(baseUrl);\n const isWp = isWordpress($);\n\n // Loop through all links, looking for hints that they may be next-page\n // links. Things like having \"page\" in their textContent, className or\n // id, or being a child of a node with a page-y className or id.\n //\n // After we do that, assign each page a score, and pick the one that\n // looks most like the next page link, as long as its score is strong\n // enough to have decent confidence.\n const scoredPages = links.reduce((possiblePages, link) => {\n // Remove any anchor data since we don't do a good job\n // standardizing URLs (it's hard), we're going to do\n // some checking with and without a trailing slash\n const attrs = getAttrs(link);\n\n // if href is undefined, return\n if (!attrs.href) return possiblePages;\n\n const href = removeAnchor(attrs.href);\n const $link = $(link);\n const linkText = $link.text();\n\n if (\n !shouldScore(href, articleUrl, baseUrl, parsedUrl, linkText, previousUrls)\n ) {\n return possiblePages;\n }\n\n // ## PASSED THE FIRST-PASS TESTS. Start scoring. ##\n if (!possiblePages[href]) {\n possiblePages[href] = {\n score: 0,\n linkText,\n href,\n };\n } else {\n possiblePages[href].linkText = `${\n possiblePages[href].linkText\n }|${linkText}`;\n }\n\n const possiblePage = possiblePages[href];\n const linkData = makeSig($link, linkText);\n const pageNum = pageNumFromUrl(href);\n\n let score = scoreBaseUrl(href, baseRegex);\n score += scoreNextLinkText(linkData);\n score += scoreCapLinks(linkData);\n score += scorePrevLink(linkData);\n score += scoreByParents($link);\n score += scoreExtraneousLinks(href);\n score += scorePageInLink(pageNum, isWp);\n score += scoreLinkText(linkText, pageNum);\n score += scoreSimilarity(score, articleUrl, href);\n\n possiblePage.score = score;\n\n return possiblePages;\n }, {});\n\n return Reflect.ownKeys(scoredPages).length === 0 ? null : scoredPages;\n}\n","import URL from 'url';\n\nimport { articleBaseUrl, removeAnchor } from 'utils/text';\nimport scoreLinks from './scoring/score-links';\n\n// Looks for and returns next page url\n// for multi-page articles\nconst GenericNextPageUrlExtractor = {\n extract({ $, url, parsedUrl, previousUrls = [] }) {\n parsedUrl = parsedUrl || URL.parse(url);\n\n const articleUrl = removeAnchor(url);\n const baseUrl = articleBaseUrl(url, parsedUrl);\n\n const links = $('a[href]').toArray();\n\n const scoredLinks = scoreLinks({\n links,\n articleUrl,\n baseUrl,\n parsedUrl,\n $,\n previousUrls,\n });\n\n // If no links were scored, return null\n if (!scoredLinks) return null;\n\n // now that we've scored all possible pages,\n // find the biggest one.\n const topPage = Reflect.ownKeys(scoredLinks).reduce(\n (acc, link) => {\n const scoredLink = scoredLinks[link];\n return scoredLink.score > acc.score ? scoredLink : acc;\n },\n { score: -100 }\n );\n\n // If the score is less than 50, we're not confident enough to use it,\n // so we fail.\n if (topPage.score >= 50) {\n return topPage.href;\n }\n\n return null;\n },\n};\n\nexport default GenericNextPageUrlExtractor;\n","export const CANONICAL_META_SELECTORS = ['og:url'];\n","import URL from 'url';\nimport { extractFromMeta } from 'utils/dom';\n\nimport { CANONICAL_META_SELECTORS } from './constants';\n\nfunction parseDomain(url) {\n const parsedUrl = URL.parse(url);\n const { hostname } = parsedUrl;\n return hostname;\n}\n\nfunction result(url) {\n return {\n url,\n domain: parseDomain(url),\n };\n}\n\nconst GenericUrlExtractor = {\n extract({ $, url, metaCache }) {\n const $canonical = $('link[rel=canonical]');\n if ($canonical.length !== 0) {\n const href = $canonical.attr('href');\n if (href) {\n return result(href);\n }\n }\n\n const metaUrl = extractFromMeta($, CANONICAL_META_SELECTORS, metaCache);\n if (metaUrl) {\n return result(metaUrl);\n }\n\n return result(url);\n },\n};\n\nexport default GenericUrlExtractor;\n","export const EXCERPT_META_SELECTORS = ['og:description', 'twitter:description'];\n","import ellipsize from 'ellipsize';\n\nimport { extractFromMeta, stripTags } from 'utils/dom';\n\nimport { EXCERPT_META_SELECTORS } from './constants';\n\nexport function clean(content, $, maxLength = 200) {\n content = content.replace(/[\\s\\n]+/g, ' ').trim();\n return ellipsize(content, maxLength, { ellipse: '…' });\n}\n\nconst GenericExcerptExtractor = {\n extract({ $, content, metaCache }) {\n const excerpt = extractFromMeta($, EXCERPT_META_SELECTORS, metaCache);\n if (excerpt) {\n return clean(stripTags(excerpt, $));\n }\n // Fall back to excerpting from the extracted content\n const maxLength = 200;\n const shortContent = content.slice(0, maxLength * 5);\n return clean($(shortContent).text(), $, maxLength);\n },\n};\n\nexport default GenericExcerptExtractor;\n","import cheerio from 'cheerio';\n\nimport { normalizeSpaces } from 'utils/text';\n\nconst GenericWordCountExtractor = {\n extract({ content }) {\n const $ = cheerio.load(content);\n const $content = $('div').first();\n\n const text = normalizeSpaces($content.text());\n return text.split(/\\s/).length;\n },\n};\n\nexport default GenericWordCountExtractor;\n","import cheerio from 'cheerio';\nimport stringDirection from 'string-direction';\n\nimport GenericContentExtractor from './content/extractor';\nimport GenericTitleExtractor from './title/extractor';\nimport GenericAuthorExtractor from './author/extractor';\nimport GenericDatePublishedExtractor from './date-published/extractor';\nimport GenericDekExtractor from './dek/extractor';\nimport GenericLeadImageUrlExtractor from './lead-image-url/extractor';\nimport GenericNextPageUrlExtractor from './next-page-url/extractor';\nimport GenericUrlExtractor from './url/extractor';\nimport GenericExcerptExtractor from './excerpt/extractor';\nimport GenericWordCountExtractor from './word-count/extractor';\n\nconst GenericExtractor = {\n // This extractor is the default for all domains\n domain: '*',\n title: GenericTitleExtractor.extract,\n date_published: GenericDatePublishedExtractor.extract,\n author: GenericAuthorExtractor.extract,\n content: GenericContentExtractor.extract.bind(GenericContentExtractor),\n lead_image_url: GenericLeadImageUrlExtractor.extract,\n dek: GenericDekExtractor.extract,\n next_page_url: GenericNextPageUrlExtractor.extract,\n url_and_domain: GenericUrlExtractor.extract,\n excerpt: GenericExcerptExtractor.extract,\n word_count: GenericWordCountExtractor.extract,\n direction: ({ title }) => stringDirection.getDirection(title),\n\n extract(options) {\n const { html, $ } = options;\n\n if (html && !$) {\n const loaded = cheerio.load(html);\n options.$ = loaded;\n }\n\n const title = this.title(options);\n const date_published = this.date_published(options);\n const author = this.author(options);\n const content = this.content({ ...options, title });\n const lead_image_url = this.lead_image_url({ ...options, content });\n const dek = this.dek({ ...options, content });\n const next_page_url = this.next_page_url(options);\n const excerpt = this.excerpt({ ...options, content });\n const word_count = this.word_count({ ...options, content });\n const direction = this.direction({ title });\n const { url, domain } = this.url_and_domain(options);\n\n return {\n title,\n author,\n date_published: date_published || null,\n dek,\n lead_image_url,\n content,\n next_page_url,\n url,\n domain,\n excerpt,\n word_count,\n direction,\n };\n },\n};\n\nexport default GenericExtractor;\n","import { MediumExtractor, BloggerExtractor } from './custom';\n\nconst Detectors = {\n 'meta[name=\"al:ios:app_name\"][value=\"Medium\"]': MediumExtractor,\n 'meta[name=\"generator\"][value=\"blogger\"]': BloggerExtractor,\n};\n\nexport default function detectByHtml($) {\n const selector = Reflect.ownKeys(Detectors).find(s => $(s).length > 0);\n\n return Detectors[selector];\n}\n","import URL from 'url';\n\nimport Extractors from './all';\nimport GenericExtractor from './generic';\nimport detectByHtml from './detect-by-html';\nimport { apiExtractors } from './add-extractor';\n\nexport default function getExtractor(url, parsedUrl, $) {\n parsedUrl = parsedUrl || URL.parse(url);\n const { hostname } = parsedUrl;\n const baseDomain = hostname\n .split('.')\n .slice(-2)\n .join('.');\n\n return (\n apiExtractors[hostname] ||\n apiExtractors[baseDomain] ||\n Extractors[hostname] ||\n Extractors[baseDomain] ||\n detectByHtml($) ||\n GenericExtractor\n );\n}\n","import Cleaners from 'cleaners';\nimport { convertNodeTo, makeLinksAbsolute } from 'utils/dom';\nimport GenericExtractor from './generic';\n\n// Remove elements by an array of selectors\nexport function cleanBySelectors($content, $, { clean }) {\n if (!clean) return $content;\n\n $(clean.join(','), $content).remove();\n\n return $content;\n}\n\n// Transform matching elements\nexport function transformElements($content, $, { transforms }) {\n if (!transforms) return $content;\n\n Reflect.ownKeys(transforms).forEach(key => {\n const $matches = $(key, $content);\n const value = transforms[key];\n\n // If value is a string, convert directly\n if (typeof value === 'string') {\n $matches.each((index, node) => {\n convertNodeTo($(node), $, transforms[key]);\n });\n } else if (typeof value === 'function') {\n // If value is function, apply function to node\n $matches.each((index, node) => {\n const result = value($(node), $);\n // If function returns a string, convert node to that value\n if (typeof result === 'string') {\n convertNodeTo($(node), $, result);\n }\n });\n }\n });\n\n return $content;\n}\n\nfunction findMatchingSelector($, selectors, extractHtml, allowMultiple) {\n return selectors.find(selector => {\n if (Array.isArray(selector)) {\n if (extractHtml) {\n return selector.reduce((acc, s) => acc && $(s).length > 0, true);\n }\n\n const [s, attr] = selector;\n return (\n (allowMultiple || (!allowMultiple && $(s).length === 1)) &&\n $(s).attr(attr) &&\n $(s)\n .attr(attr)\n .trim() !== ''\n );\n }\n\n return (\n (allowMultiple || (!allowMultiple && $(selector).length === 1)) &&\n $(selector)\n .text()\n .trim() !== ''\n );\n });\n}\n\nexport function select(opts) {\n const { $, type, extractionOpts, extractHtml = false } = opts;\n // Skip if there's not extraction for this type\n if (!extractionOpts) return null;\n\n // If a string is hardcoded for a type (e.g., Wikipedia\n // contributors), return the string\n if (typeof extractionOpts === 'string') return extractionOpts;\n\n const { selectors, defaultCleaner = true, allowMultiple } = extractionOpts;\n\n const matchingSelector = findMatchingSelector(\n $,\n selectors,\n extractHtml,\n allowMultiple\n );\n\n if (!matchingSelector) return null;\n\n function transformAndClean($node) {\n makeLinksAbsolute($node, $, opts.url || '');\n cleanBySelectors($node, $, extractionOpts);\n transformElements($node, $, extractionOpts);\n return $node;\n }\n\n function selectHtml() {\n // If the selector type requests html as its return type\n // transform and clean the element with provided selectors\n let $content;\n\n // If matching selector is an array, we're considering this a\n // multi-match selection, which allows the parser to choose several\n // selectors to include in the result. Note that all selectors in the\n // array must match in order for this selector to trigger\n if (Array.isArray(matchingSelector)) {\n $content = $(matchingSelector.join(','));\n const $wrapper = $('');\n $content.each((_, element) => {\n $wrapper.append(element);\n });\n\n $content = $wrapper;\n } else {\n $content = $(matchingSelector);\n }\n\n // Wrap in div so transformation can take place on root element\n $content.wrap($(''));\n $content = $content.parent();\n $content = transformAndClean($content);\n if (Cleaners[type]) {\n Cleaners[type]($content, { ...opts, defaultCleaner });\n }\n\n if (allowMultiple) {\n return $content\n .children()\n .toArray()\n .map(el => $.html($(el)));\n }\n\n return $.html($content);\n }\n\n if (extractHtml) {\n return selectHtml(matchingSelector);\n }\n\n let $match;\n let result;\n // if selector is an array (e.g., ['img', 'src']),\n // extract the attr\n if (Array.isArray(matchingSelector)) {\n const [selector, attr, transform] = matchingSelector;\n $match = $(selector);\n $match = transformAndClean($match);\n result = $match.map((_, el) => {\n const item = $(el)\n .attr(attr)\n .trim();\n return transform ? transform(item) : item;\n });\n } else {\n $match = $(matchingSelector);\n $match = transformAndClean($match);\n result = $match.map((_, el) =>\n $(el)\n .text()\n .trim()\n );\n }\n\n result =\n Array.isArray(result.toArray()) && allowMultiple\n ? result.toArray()\n : result[0];\n // Allow custom extractor to skip default cleaner\n // for this type; defaults to true\n if (defaultCleaner && Cleaners[type]) {\n return Cleaners[type](result, { ...opts, ...extractionOpts });\n }\n\n return result;\n}\n\nexport function selectExtendedTypes(extend, opts) {\n const results = {};\n Reflect.ownKeys(extend).forEach(t => {\n if (!results[t]) {\n results[t] = select({ ...opts, type: t, extractionOpts: extend[t] });\n }\n });\n return results;\n}\n\nfunction extractResult(opts) {\n const { type, extractor, fallback = true } = opts;\n\n const result = select({ ...opts, extractionOpts: extractor[type] });\n\n // If custom parser succeeds, return the result\n if (result) {\n return result;\n }\n\n // If nothing matches the selector, and fallback is enabled,\n // run the Generic extraction\n if (fallback) return GenericExtractor[type](opts);\n\n return null;\n}\n\nconst RootExtractor = {\n extract(extractor = GenericExtractor, opts) {\n const { contentOnly, extractedTitle } = opts;\n // This is the generic extractor. Run its extract method\n if (extractor.domain === '*') return extractor.extract(opts);\n\n opts = {\n ...opts,\n extractor,\n };\n\n if (contentOnly) {\n const content = extractResult({\n ...opts,\n type: 'content',\n extractHtml: true,\n title: extractedTitle,\n });\n return {\n content,\n };\n }\n const title = extractResult({ ...opts, type: 'title' });\n const date_published = extractResult({ ...opts, type: 'date_published' });\n const author = extractResult({ ...opts, type: 'author' });\n const next_page_url = extractResult({ ...opts, type: 'next_page_url' });\n const content = extractResult({\n ...opts,\n type: 'content',\n extractHtml: true,\n title,\n });\n const lead_image_url = extractResult({\n ...opts,\n type: 'lead_image_url',\n content,\n });\n const excerpt = extractResult({ ...opts, type: 'excerpt', content });\n const dek = extractResult({ ...opts, type: 'dek', content, excerpt });\n const word_count = extractResult({ ...opts, type: 'word_count', content });\n const direction = extractResult({ ...opts, type: 'direction', title });\n const { url, domain } = extractResult({\n ...opts,\n type: 'url_and_domain',\n }) || { url: null, domain: null };\n\n let extendedResults = {};\n if (extractor.extend) {\n extendedResults = selectExtendedTypes(extractor.extend, opts);\n }\n\n return {\n title,\n content,\n author,\n date_published,\n lead_image_url,\n dek,\n next_page_url,\n url,\n domain,\n excerpt,\n word_count,\n direction,\n ...extendedResults,\n };\n },\n};\n\nexport default RootExtractor;\n","import { removeAnchor } from 'utils/text';\nimport RootExtractor from 'extractors/root-extractor';\nimport GenericExtractor from 'extractors/generic';\nimport Resource from 'resource';\n\nexport default async function collectAllPages({\n next_page_url,\n html,\n $,\n metaCache,\n result,\n Extractor,\n title,\n url,\n}) {\n // At this point, we've fetched just the first page\n let pages = 1;\n const previousUrls = [removeAnchor(url)];\n\n // If we've gone over 26 pages, something has\n // likely gone wrong.\n while (next_page_url && pages < 26) {\n pages += 1;\n // eslint-disable-next-line no-await-in-loop\n $ = await Resource.create(next_page_url);\n html = $.html();\n\n const extractorOpts = {\n url: next_page_url,\n html,\n $,\n metaCache,\n extractedTitle: title,\n previousUrls,\n };\n\n const nextPageResult = RootExtractor.extract(Extractor, extractorOpts);\n\n previousUrls.push(next_page_url);\n result = {\n ...result,\n content: `${result.content}