Use publicsuffixlist for base domain extraction. Fix #276.

This commit is contained in:
FelisCatus 2015-02-27 20:17:05 +08:00
parent 5a7e365896
commit 0ae801e716
3 changed files with 25 additions and 26 deletions

View File

@ -18,6 +18,7 @@
},
"dependencies": {
"ipv6": "beaugunderson/javascript-ipv6",
"tldjs": "^1.5.2",
"uglify-js": "^2.4.15"
},
"browser": {

View File

@ -39,19 +39,23 @@ class AttachedCache
exports.AttachedCache = AttachedCache
exports.getBaseDomain = (domain) ->
return domain if domain.indexOf(':') > 0 # IPv6
tld = require('tldjs')
exports.isIp = (domain) ->
return true if domain.indexOf(':') > 0 # IPv6
lastCharCode = domain.charCodeAt(domain.length - 1)
return domain if 48 <= lastCharCode <= 57 # IP address ending with number.
segments = domain.split('.')
if segments.length <= 2
return domain
if segments[0] == 'www'
segments.shift()
len = segments.length
if len <= 2
return segments.join('.')
if segments[len - 2].length <= 2
return segments[len - 3] + '.' + segments[len - 2] + '.' + segments[len - 1]
else
return segments[len - 2] + '.' + segments[len - 1]
return true if 48 <= lastCharCode <= 57 # IP address ending with number.
return false
exports.getBaseDomain = (domain) ->
return domain if exports.isIp(domain)
return tld.getDomain(domain) ? domain
exports.wildcardForDomain = (domain) ->
return domain if exports.isIp(domain)
return '*.' + exports.getBaseDomain(domain)
Url = require('url')
exports.wildcardForUrl = (url) ->
domain = Url.parse(url).hostname
return exports.wildcardForDomain(domain)

View File

@ -10,19 +10,13 @@ describe 'getBaseDomain', ->
getBaseDomain('example.com').should.equal('example.com')
getBaseDomain('e.test').should.equal('e.test')
getBaseDomain('a.b').should.equal('a.b')
it 'should ignore the leading www with domains with two or more levels', ->
getBaseDomain('www.example.com').should.equal('example.com')
getBaseDomain('www.e.test').should.equal('e.test')
getBaseDomain('www.a.b').should.equal('a.b')
it 'should assume two-segment TLD if len(second segment from last) <= 2', ->
it 'should treat two-segment TLD as one component', ->
getBaseDomain('images.google.co.uk').should.equal('google.co.uk')
getBaseDomain('images.google.co.jp').should.equal('google.co.jp')
getBaseDomain('ab.de.ef.test').should.equal('de.ef.test')
it 'should assume one-segment TLD and keep two segments as base otherwise', ->
getBaseDomain('subdomain.example.com').should.equal('example.com')
getBaseDomain('some.site.example.net').should.equal('example.net')
getBaseDomain('some.site.abc.test').should.equal('abc.test')
getBaseDomain('ab.de.efg.test').should.equal('efg.test')
getBaseDomain('example.com.cn').should.equal('example.com.cn')
it 'should not mistake short domains with two-segment TLDs', ->
getBaseDomain('a.bc.com').should.equal('bc.com')
getBaseDomain('i.t.co').should.equal('t.co')
it 'should not try to modify IP address literals', ->
getBaseDomain('127.0.0.1').should.equal('127.0.0.1')
getBaseDomain('[::1]').should.equal('[::1]')