Skip to content

Commit ba438ba

Browse files
committed
Add options to independently allow or disallow matching of scheme, www, and top level domain urls
1 parent 3a019c2 commit ba438ba

File tree

6 files changed

+272
-56
lines changed

6 files changed

+272
-56
lines changed

README.md

Lines changed: 22 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -139,9 +139,29 @@ providing an Object as the second parameter to [Autolinker.link()](http://gregja
139139
4) Twitter links will have the CSS classes: "myLink myLink-twitter"<br />
140140
5) Hashtag links will have the CSS classes: "myLink myLink-hashtag"<br />
141141

142-
- [urls](http://gregjacobs.github.io/Autolinker.js/docs/#!/api/Autolinker-cfg-urls) : Boolean<br />
142+
- [urls](http://gregjacobs.github.io/Autolinker.js/docs/#!/api/Autolinker-cfg-urls) : Boolean/Object<br />
143143
`true` to have URLs auto-linked, `false` to skip auto-linking of URLs.
144-
Defaults to `true`.<br />
144+
Defaults to `true`.<br>
145+
146+
This option also accepts an Object form with 3 properties, to allow for more
147+
customization of what exactly gets linked. All default to `true`:
148+
149+
- schemeMatches (Boolean): `true` to match URLs found prefixed with a scheme,
150+
i.e. `http://google.com`, or `other+scheme://google.com`, `false` to
151+
prevent these types of matches.
152+
- wwwMatches (Boolean): `true` to match urls found prefixed with `'www.'`,
153+
i.e. `www.google.com`. `false` to prevent these types of matches. Note
154+
that if the URL had a prefixed scheme, and `schemeMatches` is true, it
155+
will still be linked.
156+
- tldMatches: `true` to match URLs with known top level domains (.com, .net,
157+
etc.) that are not prefixed with a scheme or `'www.'`. This option
158+
attempts to match anything that looks like a URL in the given text.
159+
Ex: `google.com`, `asdf.org/?page=1`, etc. `false` to prevent these types
160+
of matches.
161+
<br />
162+
163+
Example usage: `urls: { schemeMatches: true, wwwMatches: true, tldMatches: false }`
164+
145165
- [email](http://gregjacobs.github.io/Autolinker.js/docs/#!/api/Autolinker-cfg-email) : Boolean<br />
146166
`true` to have email addresses auto-linked, `false` to skip auto-linking of
147167
email addresses. Defaults to `true`.<br /><br />

src/Autolinker.js

Lines changed: 72 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -113,44 +113,59 @@ var Autolinker = function( cfg ) {
113113
throw new Error( "invalid `hashtag` cfg - see docs" );
114114
}
115115

116-
// Normalize the `truncate` option
117-
var truncate = this.truncate = this.truncate || {};
118-
if( typeof truncate === 'number' ) {
119-
this.truncate = { length: truncate, location: 'end' };
120-
} else if( typeof truncate === 'object' ) {
121-
this.truncate.length = truncate.length || Number.POSITIVE_INFINITY;
122-
this.truncate.location = truncate.location || 'end';
123-
}
116+
// Normalize the configs
117+
this.urls = this.normalizeUrlsCfg( this.urls );
118+
this.truncate = this.normalizeTruncateCfg( this.truncate );
124119
};
125120

126121
Autolinker.prototype = {
127122
constructor : Autolinker, // fix constructor property
128123

129124
/**
130-
* @cfg {Boolean} urls
131-
*
132-
* `true` if miscellaneous URLs should be automatically linked, `false` if they should not be.
125+
* @cfg {Boolean/Object} urls
126+
*
127+
* `true` if URLs should be automatically linked, `false` if they should not
128+
* be.
129+
*
130+
* This option also accepts an Object form with 3 properties, to allow for
131+
* more customization of what exactly gets linked. All default to `true`:
132+
*
133+
* @param {Boolean} schemeMatches `true` to match URLs found prefixed with a
134+
* scheme, i.e. `http://google.com`, or `other+scheme://google.com`,
135+
* `false` to prevent these types of matches.
136+
* @param {Boolean} wwwMatches `true` to match urls found prefixed with
137+
* `'www.'`, i.e. `www.google.com`. `false` to prevent these types of
138+
* matches. Note that if the URL had a prefixed scheme, and
139+
* `schemeMatches` is true, it will still be linked.
140+
* @param {Boolean} tldMatches `true` to match URLs with known top level
141+
* domains (.com, .net, etc.) that are not prefixed with a scheme or
142+
* `'www.'`. This option attempts to match anything that looks like a URL
143+
* in the given text. Ex: `google.com`, `asdf.org/?page=1`, etc. `false`
144+
* to prevent these types of matches.
133145
*/
134146
urls : true,
135147

136148
/**
137149
* @cfg {Boolean} email
138150
*
139-
* `true` if email addresses should be automatically linked, `false` if they should not be.
151+
* `true` if email addresses should be automatically linked, `false` if they
152+
* should not be.
140153
*/
141154
email : true,
142155

143156
/**
144157
* @cfg {Boolean} twitter
145158
*
146-
* `true` if Twitter handles ("@example") should be automatically linked, `false` if they should not be.
159+
* `true` if Twitter handles ("@example") should be automatically linked,
160+
* `false` if they should not be.
147161
*/
148162
twitter : true,
149163

150164
/**
151165
* @cfg {Boolean} phone
152166
*
153-
* `true` if Phone numbers ("(555)555-5555") should be automatically linked, `false` if they should not be.
167+
* `true` if Phone numbers ("(555)555-5555") should be automatically linked,
168+
* `false` if they should not be.
154169
*/
155170
phone: true,
156171

@@ -288,6 +303,49 @@ Autolinker.prototype = {
288303
*/
289304
tagBuilder : undefined,
290305

306+
307+
/**
308+
* Normalizes the {@link #urls} config into an Object with 3 properties:
309+
* `schemeMatches`, `wwwMatches`, and `tldMatches`, all Booleans.
310+
*
311+
* See {@link #urls} config for details.
312+
*
313+
* @private
314+
* @param {Boolean/Object} urls
315+
* @return {Object}
316+
*/
317+
normalizeUrlsCfg : function( urls ) {
318+
if( typeof urls === 'boolean' ) {
319+
return { schemeMatches: urls, wwwMatches: urls, tldMatches: urls };
320+
} else {
321+
return Autolinker.Util.defaults( urls || {}, { schemeMatches: true, wwwMatches: true, tldMatches: true } );
322+
}
323+
},
324+
325+
326+
/**
327+
* Normalizes the {@link #truncate} config into an Object with 2 properties:
328+
* `length` (Number), and `location` (String).
329+
*
330+
* See {@link #truncate} config for details.
331+
*
332+
* @private
333+
* @param {Number/Object} truncate
334+
* @return {Object}
335+
*/
336+
normalizeTruncateCfg : function( truncate ) {
337+
if( typeof truncate === 'number' ) {
338+
return { length: truncate, location: 'end' };
339+
340+
} else { // object, or undefined/null
341+
return Autolinker.Util.defaults( truncate || {}, {
342+
length : Number.POSITIVE_INFINITY,
343+
location : 'end'
344+
} );
345+
}
346+
},
347+
348+
291349
/**
292350
* Automatically links URLs, Email addresses, Phone numbers, Twitter
293351
* handles, and Hashtags found in the given chunk of HTML. Does not link

src/Util.js

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,25 @@ Autolinker.Util = {
4444
},
4545

4646

47+
/**
48+
* Assigns (shallow copies) the properties of `src` onto `dest`, if the
49+
* corresponding property on `dest` === `undefined`.
50+
*
51+
* @param {Object} dest The destination object.
52+
* @param {Object} src The source object.
53+
* @return {Object} The destination object (`dest`)
54+
*/
55+
defaults : function( dest, src ) {
56+
for( var prop in src ) {
57+
if( src.hasOwnProperty( prop ) && dest[ prop ] === undefined ) {
58+
dest[ prop ] = src[ prop ];
59+
}
60+
}
61+
62+
return dest;
63+
},
64+
65+
4766
/**
4867
* Extends `superclass` to create a new subclass, adding the `protoProps` to the new subclass's prototype.
4968
*

src/matchParser/MatchParser.js

Lines changed: 42 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
Autolinker.matchParser.MatchParser = Autolinker.Util.extend( Object, {
1515

1616
/**
17-
* @cfg {Boolean} urls
17+
* @cfg {Object} urls
1818
* @inheritdoc Autolinker#urls
1919
*/
2020
urls : true,
@@ -78,26 +78,31 @@ Autolinker.matchParser.MatchParser = Autolinker.Util.extend( Object, {
7878
* used to match protocol URLs with just a single word, like 'http://localhost',
7979
* where we won't double check that the domain name has at least one '.'
8080
* in it.
81-
* 7. A protocol-relative ('//') match for the case of a 'www.' prefixed
81+
* 7. Group that matches a 'www.' prefixed URL. This is only matched if the
82+
* 'www.' text was not prefixed by a scheme (i.e.: not prefixed by
83+
* 'http://', 'ftp:', etc.)
84+
* 8. A protocol-relative ('//') match for the case of a 'www.' prefixed
8285
* URL. Will be an empty string if it is not a protocol-relative match.
8386
* We need to know the character before the '//' in order to determine
8487
* if it is a valid match or the // was in a string we don't want to
8588
* auto-link.
86-
* 8. A protocol-relative ('//') match for the case of a known TLD prefixed
89+
* 9. Group that matches a known TLD (top level domain), when a scheme
90+
* or 'www.'-prefixed domain is not matched.
91+
* 10. A protocol-relative ('//') match for the case of a known TLD prefixed
8792
* URL. Will be an empty string if it is not a protocol-relative match.
8893
* See #6 for more info.
89-
* 9. Group that is used to determine if there is a phone number match.
90-
* 10. If there is a phone number match, and a '+' sign was included with
94+
* 11. Group that is used to determine if there is a phone number match.
95+
* 12. If there is a phone number match, and a '+' sign was included with
9196
* the phone number, this group will be populated with the '+' sign.
92-
* 11. Group that is used to determine if there is a Hashtag match
97+
* 13. Group that is used to determine if there is a Hashtag match
9398
* (i.e. \#someHashtag). Simply check for its existence to determine if
9499
* there is a Hashtag match. The next couple of capturing groups give
95100
* information about the Hashtag match.
96-
* 12. The whitespace character before the #sign in a Hashtag handle. This
101+
* 14. The whitespace character before the #sign in a Hashtag handle. This
97102
* is needed because there are no look-behinds in JS regular
98103
* expressions, and can be used to reconstruct the original string in a
99104
* replace().
100-
* 13. The Hashtag itself in a Hashtag match. If the match is
105+
* 15. The Hashtag itself in a Hashtag match. If the match is
101106
* '#someHashtag', the hashtag is 'someHashtag'.
102107
*/
103108
matcherRegex : (function() {
@@ -135,23 +140,23 @@ Autolinker.matchParser.MatchParser = Autolinker.Util.extend( Object, {
135140

136141
'(', // *** Capturing group $5, which is used to match a URL
137142
'(?:', // parens to cover match for protocol (optional), and domain
138-
'(', // *** Capturing group $6, for a protocol-prefixed url (ex: http://google.com)
143+
'(', // *** Capturing group $6, for a scheme-prefixed url (ex: http://google.com)
139144
protocolRegex.source,
140145
domainNameRegex.source,
141146
')',
142147

143148
'|',
144149

145-
'(?:', // non-capturing paren for a 'www.' prefixed url (ex: www.google.com)
146-
'(.?//)?', // *** Capturing group $7 for an optional protocol-relative URL. Must be at the beginning of the string or start with a non-word character
150+
'(', // *** Capturing group $7, for a 'www.' prefixed url (ex: www.google.com)
151+
'(.?//)?', // *** Capturing group $8 for an optional protocol-relative URL. Must be at the beginning of the string or start with a non-word character
147152
wwwRegex.source,
148153
domainNameRegex.source,
149154
')',
150155

151156
'|',
152157

153-
'(?:', // non-capturing paren for known a TLD url (ex: google.com)
154-
'(.?//)?', // *** Capturing group $8 for an optional protocol-relative URL. Must be at the beginning of the string or start with a non-word character
158+
'(', // *** Capturing group $9, for known a TLD url (ex: google.com)
159+
'(.?//)?', // *** Capturing group $10 for an optional protocol-relative URL. Must be at the beginning of the string or start with a non-word character
155160
domainNameRegex.source,
156161
tldRegex.source,
157162
')',
@@ -163,17 +168,17 @@ Autolinker.matchParser.MatchParser = Autolinker.Util.extend( Object, {
163168
'|',
164169

165170
// this setup does not scale well for open extension :( Need to rethink design of autolinker...
166-
// *** Capturing group $9, which matches a (USA for now) phone number, and
167-
// *** Capturing group $10, which matches the '+' sign for international numbers, if it exists
171+
// *** Capturing group $11, which matches a (USA for now) phone number, and
172+
// *** Capturing group $12, which matches the '+' sign for international numbers, if it exists
168173
'(',
169174
phoneRegex.source,
170175
')',
171176

172177
'|',
173178

174-
'(', // *** Capturing group $11, which can be used to check for a Hashtag match. Use group $12 for the actual Hashtag though. $11 may be used to reconstruct the original string in a replace()
175-
// *** Capturing group $12, which matches the whitespace character before the '#' sign (needed because of no lookbehinds), and
176-
// *** Capturing group $13, which matches the actual Hashtag
179+
'(', // *** Capturing group $13, which can be used to check for a Hashtag match. Use group $12 for the actual Hashtag though. $11 may be used to reconstruct the original string in a replace()
180+
// *** Capturing group $14, which matches the whitespace character before the '#' sign (needed because of no lookbehinds), and
181+
// *** Capturing group $15, which matches the actual Hashtag
177182
hashtagRegex.source,
178183
')'
179184
].join( "" ), 'gi' );
@@ -230,8 +235,8 @@ Autolinker.matchParser.MatchParser = Autolinker.Util.extend( Object, {
230235
replace : function( text, replaceFn, contextObj ) {
231236
var me = this; // for closure
232237

233-
return text.replace( this.matcherRegex, function( matchStr, $1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13 ) {
234-
var matchDescObj = me.processCandidateMatch( matchStr, $1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13 ); // "match description" object
238+
return text.replace( this.matcherRegex, function( matchStr/*, $1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15*/ ) {
239+
var matchDescObj = me.processCandidateMatch.apply( me, arguments ); // "match description" object
235240

236241
// Return out with no changes for match types that are disabled (url,
237242
// email, phone, etc.), or for matches that are invalid (false
@@ -271,12 +276,17 @@ Autolinker.matchParser.MatchParser = Autolinker.Util.extend( Object, {
271276
* @param {String} emailAddressMatch The matched email address for an email
272277
* address match.
273278
* @param {String} urlMatch The matched URL string for a URL match.
274-
* @param {String} protocolUrlMatch The match URL string for a protocol
279+
* @param {String} schemeUrlMatch The match URL string for a protocol
275280
* match. Ex: 'http://yahoo.com'. This is used to match something like
276281
* 'http://localhost', where we won't double check that the domain name
277282
* has at least one '.' in it.
283+
* @param {String} wwwMatch The matched string of a 'www.'-prefixed URL that
284+
* was matched. This is only matched if the 'www.' text was not prefixed
285+
* by a scheme (i.e.: not prefixed by 'http://', 'ftp:', etc.).
278286
* @param {String} wwwProtocolRelativeMatch The '//' for a protocol-relative
279287
* match from a 'www' url, with the character that comes before the '//'.
288+
* @param {String} tldMatch The matched string of a known TLD (top level
289+
* domain), when a scheme or 'www.'-prefixed domain is not matched.
280290
* @param {String} tldProtocolRelativeMatch The '//' for a protocol-relative
281291
* match from a TLD (top level domain) match, with the character that
282292
* comes before the '//'.
@@ -308,8 +318,8 @@ Autolinker.matchParser.MatchParser = Autolinker.Util.extend( Object, {
308318
*/
309319
processCandidateMatch : function(
310320
matchStr, twitterMatch, twitterHandlePrefixWhitespaceChar, twitterHandle,
311-
emailAddressMatch, urlMatch, protocolUrlMatch, wwwProtocolRelativeMatch,
312-
tldProtocolRelativeMatch, phoneMatch, phonePlusSignMatch, hashtagMatch,
321+
emailAddressMatch, urlMatch, schemeUrlMatch, wwwMatch, wwwProtocolRelativeMatch,
322+
tldMatch, tldProtocolRelativeMatch, phoneMatch, phonePlusSignMatch, hashtagMatch,
313323
hashtagPrefixWhitespaceChar, hashtag
314324
) {
315325
// Note: The `matchStr` variable wil be fixed up to remove characters that are no longer needed (which will
@@ -319,19 +329,23 @@ Autolinker.matchParser.MatchParser = Autolinker.Util.extend( Object, {
319329
match, // Will be an Autolinker.match.Match object
320330

321331
prefixStr = "", // A string to use to prefix the anchor tag that is created. This is needed for the Twitter and Hashtag matches.
322-
suffixStr = ""; // A string to suffix the anchor tag that is created. This is used if there is a trailing parenthesis that should not be auto-linked.
332+
suffixStr = "", // A string to suffix the anchor tag that is created. This is used if there is a trailing parenthesis that should not be auto-linked.
333+
334+
urls = this.urls; // the 'urls' config
323335

324336
// Return out with `null` for match types that are disabled (url, email,
325337
// twitter, hashtag), or for matches that are invalid (false positives
326338
// from the matcherRegex, which can't use look-behinds since they are
327339
// unavailable in JS).
328340
if(
329-
( urlMatch && !this.urls ) ||
341+
( schemeUrlMatch && !urls.schemeMatches ) ||
342+
( wwwMatch && !urls.wwwMatches ) ||
343+
( tldMatch && !urls.tldMatches ) ||
330344
( emailAddressMatch && !this.email ) ||
331345
( phoneMatch && !this.phone ) ||
332346
( twitterMatch && !this.twitter ) ||
333347
( hashtagMatch && !this.hashtag ) ||
334-
!this.matchValidator.isValidMatch( urlMatch, protocolUrlMatch, protocolRelativeMatch )
348+
!this.matchValidator.isValidMatch( urlMatch, schemeUrlMatch, protocolRelativeMatch )
335349
) {
336350
return null;
337351
}
@@ -344,7 +358,7 @@ Autolinker.matchParser.MatchParser = Autolinker.Util.extend( Object, {
344358
suffixStr = ")"; // this will be added after the generated <a> tag
345359
} else {
346360
// Handle an invalid character after the TLD
347-
var pos = this.matchHasInvalidCharAfterTld( urlMatch, protocolUrlMatch );
361+
var pos = this.matchHasInvalidCharAfterTld( urlMatch, schemeUrlMatch );
348362
if( pos > -1 ) {
349363
suffixStr = matchStr.substr(pos); // this will be added after the generated <a> tag
350364
matchStr = matchStr.substr( 0, pos ); // remove the trailing invalid chars
@@ -396,7 +410,7 @@ Autolinker.matchParser.MatchParser = Autolinker.Util.extend( Object, {
396410
match = new Autolinker.match.Url( {
397411
matchedText : matchStr,
398412
url : matchStr,
399-
protocolUrlMatch : !!protocolUrlMatch,
413+
protocolUrlMatch : !!schemeUrlMatch,
400414
protocolRelativeMatch : !!protocolRelativeMatch,
401415
stripPrefix : this.stripPrefix
402416
} );

0 commit comments

Comments
 (0)