Skip to content

Commit 4a8b50f

Browse files
committed
Merge pull request #105 from dgrad/enforceslashpath
Exclude invalid characters right after TLD in detected URLs
2 parents d4d3270 + 4ade3df commit 4a8b50f

File tree

2 files changed

+118
-10
lines changed

2 files changed

+118
-10
lines changed

src/matchParser/MatchParser.js

Lines changed: 50 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -338,6 +338,13 @@ Autolinker.matchParser.MatchParser = Autolinker.Util.extend( Object, {
338338
if( this.matchHasUnbalancedClosingParen( matchStr ) ) {
339339
matchStr = matchStr.substr( 0, matchStr.length - 1 ); // remove the trailing ")"
340340
suffixStr = ")"; // this will be added after the generated <a> tag
341+
} else {
342+
// Handle an invalid character after the TLD
343+
var pos = this.matchHasInvalidCharAfterTld( urlMatch, protocolUrlMatch );
344+
if( pos > -1 ) {
345+
suffixStr = matchStr.substr(pos); // this will be added after the generated <a> tag
346+
matchStr = matchStr.substr( 0, pos ); // remove the trailing invalid chars
347+
}
341348
}
342349

343350
if( emailAddressMatch ) {
@@ -433,6 +440,48 @@ Autolinker.matchParser.MatchParser = Autolinker.Util.extend( Object, {
433440
}
434441

435442
return false;
443+
},
444+
445+
446+
/**
447+
* Determine if there's an invalid character after the TLD in a URL. Valid
448+
* characters after TLD are ':/?#'. Exclude protocol matched URLs from this
449+
* check.
450+
*
451+
* @private
452+
* @param {String} urlMatch The matched URL, if there was one. Will be an
453+
* empty string if the match is not a URL match.
454+
* @param {String} protocolUrlMatch The match URL string for a protocol
455+
* match. Ex: 'http://yahoo.com'. This is used to match something like
456+
* 'http://localhost', where we won't double check that the domain name
457+
* has at least one '.' in it.
458+
* @return {Number} the position where the invalid character was found. If
459+
* no such character was found, returns -1
460+
*/
461+
matchHasInvalidCharAfterTld : function( urlMatch, protocolUrlMatch ) {
462+
if ( !urlMatch ) {
463+
return -1;
464+
}
465+
466+
var offset = 0;
467+
if ( protocolUrlMatch ) {
468+
offset = urlMatch.indexOf(':');
469+
urlMatch = urlMatch.slice(offset);
470+
}
471+
472+
var re = /^((.?\/\/)?[A-Za-z0-9\.\-]*[A-Za-z0-9\-]\.[A-Za-z]+)/;
473+
var res = re.exec( urlMatch );
474+
if ( res === null ) {
475+
return -1;
476+
}
477+
478+
offset += res[1].length;
479+
urlMatch = urlMatch.slice(res[1].length);
480+
if (/^[^.A-Za-z:\/?#]/.test(urlMatch)) {
481+
return offset;
482+
}
483+
484+
return -1;
436485
}
437486

438-
} );
487+
} );

tests/AutolinkerSpec.js

Lines changed: 68 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -96,9 +96,29 @@ describe( "Autolinker", function() {
9696
} );
9797

9898

99-
it( "should not include the '?' char if it is at the end of the URL", function() {
100-
var result = autolinker.link( "Joe went to http://localhost:8000? today" );
101-
expect( result ).toBe( 'Joe went to <a href="http://localhost:8000">localhost:8000</a>? today' );
99+
it( "should not include [?!:,.;] chars if at the end of the URL", function() {
100+
var result1 = autolinker.link( "Joe went to http://localhost:8000? today" );
101+
expect( result1 ).toBe( 'Joe went to <a href="http://localhost:8000">localhost:8000</a>? today' );
102+
var result2 = autolinker.link( "Joe went to http://localhost:8000! today" );
103+
expect( result2 ).toBe( 'Joe went to <a href="http://localhost:8000">localhost:8000</a>! today' );
104+
var result3 = autolinker.link( "Joe went to http://localhost:8000: today" );
105+
expect( result3 ).toBe( 'Joe went to <a href="http://localhost:8000">localhost:8000</a>: today' );
106+
var result4 = autolinker.link( "Joe went to http://localhost:8000, today" );
107+
expect( result4 ).toBe( 'Joe went to <a href="http://localhost:8000">localhost:8000</a>, today' );
108+
var result5 = autolinker.link( "Joe went to http://localhost:8000. today" );
109+
expect( result5 ).toBe( 'Joe went to <a href="http://localhost:8000">localhost:8000</a>. today' );
110+
var result6 = autolinker.link( "Joe went to http://localhost:8000; today" );
111+
expect( result6 ).toBe( 'Joe went to <a href="http://localhost:8000">localhost:8000</a>; today' );
112+
} );
113+
114+
115+
it( "should exclude invalid chars after TLD", function() {
116+
var result1 = autolinker.link( "Joe went to http://www.yahoo.com's today" );
117+
expect( result1 ).toBe( 'Joe went to <a href="http://www.yahoo.com">yahoo.com</a>\'s today' );
118+
var result2 = autolinker.link( "Joe went to https://www.yahoo.com/foo's today" );
119+
expect( result2 ).toBe( 'Joe went to <a href="https://www.yahoo.com/foo\'s">yahoo.com/foo\'s</a> today' );
120+
var result3 = autolinker.link( "Joe went to http://www.yahoo.com's/foo today" );
121+
expect( result3 ).toBe( 'Joe went to <a href="http://www.yahoo.com">yahoo.com</a>\'s/foo today' );
102122
} );
103123

104124

@@ -374,9 +394,29 @@ describe( "Autolinker", function() {
374394
} );
375395

376396

377-
it( "should not include the '?' char if it is at the end of the URL", function() {
378-
var result = autolinker.link( "Joe went to www.yahoo.com? today" );
379-
expect( result ).toBe( 'Joe went to <a href="http://www.yahoo.com">yahoo.com</a>? today' );
397+
it( "should not include [?!:,.;] chars if at the end of the URL", function() {
398+
var result1 = autolinker.link( "Joe went to www.yahoo.com? today" );
399+
expect( result1 ).toBe( 'Joe went to <a href="http://www.yahoo.com">yahoo.com</a>? today' );
400+
var result2 = autolinker.link( "Joe went to www.yahoo.com! today" );
401+
expect( result2 ).toBe( 'Joe went to <a href="http://www.yahoo.com">yahoo.com</a>! today' );
402+
var result3 = autolinker.link( "Joe went to www.yahoo.com: today" );
403+
expect( result3 ).toBe( 'Joe went to <a href="http://www.yahoo.com">yahoo.com</a>: today' );
404+
var result4 = autolinker.link( "Joe went to www.yahoo.com, today" );
405+
expect( result4 ).toBe( 'Joe went to <a href="http://www.yahoo.com">yahoo.com</a>, today' );
406+
var result5 = autolinker.link( "Joe went to www.yahoo.com. today" );
407+
expect( result5 ).toBe( 'Joe went to <a href="http://www.yahoo.com">yahoo.com</a>. today' );
408+
var result6 = autolinker.link( "Joe went to www.yahoo.com; today" );
409+
expect( result6 ).toBe( 'Joe went to <a href="http://www.yahoo.com">yahoo.com</a>; today' );
410+
} );
411+
412+
413+
it( "should exclude invalid chars after TLD", function() {
414+
var result1 = autolinker.link( "Joe went to www.yahoo.com's today" );
415+
expect( result1 ).toBe( 'Joe went to <a href="http://www.yahoo.com">yahoo.com</a>\'s today' );
416+
var result2 = autolinker.link( "Joe went to www.yahoo.com/foo's today" );
417+
expect( result2 ).toBe( 'Joe went to <a href="http://www.yahoo.com/foo\'s">yahoo.com/foo\'s</a> today' );
418+
var result3 = autolinker.link( "Joe went to www.yahoo.com's/foo today" );
419+
expect( result3 ).toBe( 'Joe went to <a href="http://www.yahoo.com">yahoo.com</a>\'s/foo today' );
380420
} );
381421

382422
} );
@@ -444,9 +484,28 @@ describe( "Autolinker", function() {
444484
} );
445485

446486

447-
it( "should not include the '?' char if it is at the end of the URL", function() {
448-
var result = autolinker.link( "Joe went to yahoo.com? today" );
449-
expect( result ).toBe( 'Joe went to <a href="http://yahoo.com">yahoo.com</a>? today' );
487+
it( "should not include [?!:,.;] chars if at the end of the URL", function() {
488+
var result1 = autolinker.link( "Joe went to yahoo.com? today" );
489+
expect( result1 ).toBe( 'Joe went to <a href="http://yahoo.com">yahoo.com</a>? today' );
490+
var result2 = autolinker.link( "Joe went to yahoo.com! today" );
491+
expect( result2 ).toBe( 'Joe went to <a href="http://yahoo.com">yahoo.com</a>! today' );
492+
var result3 = autolinker.link( "Joe went to yahoo.com: today" );
493+
expect( result3 ).toBe( 'Joe went to <a href="http://yahoo.com">yahoo.com</a>: today' );
494+
var result4 = autolinker.link( "Joe went to yahoo.com, today" );
495+
expect( result4 ).toBe( 'Joe went to <a href="http://yahoo.com">yahoo.com</a>, today' );
496+
var result5 = autolinker.link( "Joe went to yahoo.com. today" );
497+
expect( result5 ).toBe( 'Joe went to <a href="http://yahoo.com">yahoo.com</a>. today' );
498+
var result6 = autolinker.link( "Joe went to yahoo.com; today" );
499+
expect( result6 ).toBe( 'Joe went to <a href="http://yahoo.com">yahoo.com</a>; today' );
500+
} );
501+
502+
it( "should exclude invalid chars after TLD", function() {
503+
var result1 = autolinker.link( "Joe went to yahoo.com's today" );
504+
expect( result1 ).toBe( 'Joe went to <a href="http://yahoo.com">yahoo.com</a>\'s today' );
505+
var result2 = autolinker.link( "Joe went to yahoo.com/foo's today" );
506+
expect( result2 ).toBe( 'Joe went to <a href="http://yahoo.com/foo\'s">yahoo.com/foo\'s</a> today' );
507+
var result3 = autolinker.link( "Joe went to yahoo.com's/foo today" );
508+
expect( result3 ).toBe( 'Joe went to <a href="http://yahoo.com">yahoo.com</a>\'s/foo today' );
450509
} );
451510

452511
} );

0 commit comments

Comments
 (0)